]>
Commit | Line | Data |
---|---|---|
68a9a450 | 1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
3 | ExtractorError, | |
4 | js_to_json, | |
5 | smuggle_url, | |
6 | str_or_none, | |
7 | traverse_obj, | |
8 | unescapeHTML, | |
9 | ) | |
10 | ||
11 | ||
12 | class GeniusIE(InfoExtractor): | |
460da074 | 13 | _VALID_URL = r'https?://(?:www\.)?genius\.com/(?:videos|(?P<article>a))/(?P<id>[^?/#]+)' |
68a9a450 | 14 | _TESTS = [{ |
15 | 'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly', | |
16 | 'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c', | |
17 | 'info_dict': { | |
18 | 'id': '6313303597112', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'Vince Staples Breaks Down The Meaning Of “When Sparks Fly”', | |
21 | 'description': 'md5:bc15e00342c537c0039d414423ae5752', | |
22 | 'tags': 'count:1', | |
23 | 'uploader_id': '4863540648001', | |
24 | 'duration': 388.416, | |
25 | 'upload_date': '20221005', | |
26 | 'timestamp': 1664982341, | |
27 | 'thumbnail': r're:^https?://.*\.jpg$', | |
28 | }, | |
29 | }, { | |
30 | 'url': 'https://genius.com/videos/Breaking-down-drakes-certified-lover-boy-kanye-beef-way-2-sexy-cudi', | |
31 | 'md5': 'b8ed87a5efd1473bd027c20a969d4060', | |
32 | 'info_dict': { | |
33 | 'id': '6271792014001', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'md5:c6355f7fa8a70bc86492a3963919fc15', | |
36 | 'description': 'md5:1774638c31548b31b037c09e9b821393', | |
37 | 'tags': 'count:3', | |
38 | 'uploader_id': '4863540648001', | |
39 | 'duration': 2685.099, | |
40 | 'upload_date': '20210909', | |
41 | 'timestamp': 1631209167, | |
42 | 'thumbnail': r're:^https?://.*\.jpg$', | |
43 | }, | |
460da074 | 44 | }, { |
45 | 'url': 'https://genius.com/a/cordae-anderson-paak-break-down-the-meaning-of-two-tens', | |
46 | 'md5': 'f98a4e03b16b0a2821bd6e52fb3cc9d7', | |
47 | 'info_dict': { | |
48 | 'id': '6321509903112', | |
49 | 'ext': 'mp4', | |
50 | 'title': 'Cordae & Anderson .Paak Breaks Down The Meaning Of “Two Tens”', | |
51 | 'description': 'md5:1255f0e1161d07342ce56a8464ac339d', | |
52 | 'tags': ['song id: 5457554'], | |
53 | 'uploader_id': '4863540648001', | |
54 | 'duration': 361.813, | |
55 | 'upload_date': '20230301', | |
56 | 'timestamp': 1677703908, | |
57 | 'thumbnail': r're:^https?://.*\.jpg$', | |
58 | }, | |
68a9a450 | 59 | }] |
60 | ||
61 | def _real_extract(self, url): | |
460da074 | 62 | display_id, is_article = self._match_valid_url(url).group('id', 'article') |
68a9a450 | 63 | webpage = self._download_webpage(url, display_id) |
64 | ||
65 | metadata = self._search_json( | |
460da074 | 66 | r'<meta content="', webpage, 'metadata', display_id, |
67 | end_pattern=r'"\s+itemprop="page_data"', transform_source=unescapeHTML) | |
68 | video_id = traverse_obj(metadata, ( | |
69 | (('article', 'media', ...), ('video', None)), | |
70 | ('provider_id', ('dfp_kv', lambda _, v: v['name'] == 'brightcove_video_id', 'values', ...))), | |
71 | get_all=False) | |
68a9a450 | 72 | if not video_id: |
460da074 | 73 | # Not all article pages have videos, expect the error |
74 | raise ExtractorError('Brightcove video ID not found in webpage', expected=bool(is_article)) | |
68a9a450 | 75 | |
76 | config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={}) | |
77 | account_id = config.get('brightcove_account_id', '4863540648001') | |
78 | player_id = traverse_obj( | |
79 | config, 'brightcove_standard_web_player_id', 'brightcove_standard_no_autoplay_web_player_id', | |
80 | 'brightcove_modal_web_player_id', 'brightcove_song_story_web_player_id', default='S1ZcmcOC1x') | |
81 | ||
82 | return self.url_result( | |
83 | smuggle_url( | |
84 | f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}', | |
85 | {'referrer': url}), 'BrightcoveNew', video_id) | |
86 | ||
87 | ||
88 | class GeniusLyricsIE(InfoExtractor): | |
460da074 | 89 | _VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics(?:[?/#]|$)' |
68a9a450 | 90 | _TESTS = [{ |
91 | 'url': 'https://genius.com/Lil-baby-heyy-lyrics', | |
92 | 'playlist_mincount': 2, | |
93 | 'info_dict': { | |
94 | 'id': '8454545', | |
95 | 'title': 'Heyy', | |
96 | 'description': 'Heyy by Lil Baby', | |
97 | }, | |
98 | }, { | |
99 | 'url': 'https://genius.com/Outkast-two-dope-boyz-in-a-cadillac-lyrics', | |
100 | 'playlist_mincount': 1, | |
101 | 'info_dict': { | |
102 | 'id': '36239', | |
103 | 'title': 'Two Dope Boyz (In a Cadillac)', | |
104 | 'description': 'Two Dope Boyz (In a Cadillac) by OutKast', | |
105 | }, | |
106 | }, { | |
107 | 'url': 'https://genius.com/Playboi-carti-rip-lyrics', | |
108 | 'playlist_mincount': 1, | |
109 | 'info_dict': { | |
110 | 'id': '3710582', | |
111 | 'title': 'R.I.P.', | |
112 | 'description': 'R.I.P. by Playboi Carti', | |
113 | }, | |
114 | }] | |
115 | ||
116 | def _real_extract(self, url): | |
117 | display_id = self._match_id(url) | |
118 | webpage = self._download_webpage(url, display_id) | |
119 | ||
120 | json_string = self._search_json( | |
121 | r'window\.__PRELOADED_STATE__\s*=\s*JSON\.parse\(', webpage, 'json string', | |
122 | display_id, transform_source=js_to_json, contains_pattern=r'\'{(?s:.+)}\'') | |
123 | song_info = self._parse_json(json_string, display_id) | |
124 | song_id = str_or_none(traverse_obj(song_info, ('songPage', 'song'))) | |
125 | if not song_id: | |
126 | raise ExtractorError('Song id not found in webpage') | |
127 | ||
128 | title = traverse_obj( | |
129 | song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Title', 'value'), | |
130 | get_all=False, default='untitled') | |
131 | artist = traverse_obj( | |
132 | song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Primary Artist', 'value'), | |
133 | get_all=False, default='unknown artist') | |
134 | media = traverse_obj( | |
135 | song_info, ('entities', 'songs', song_id, 'media'), expected_type=list, default=[]) | |
136 | ||
137 | entries = [] | |
138 | for m in media: | |
139 | if m.get('type') in ('video', 'audio') and m.get('url'): | |
140 | if m.get('provider') == 'spotify': | |
141 | self.to_screen(f'{song_id}: Skipping Spotify audio embed') | |
142 | else: | |
143 | entries.append(self.url_result(m['url'])) | |
144 | ||
145 | return self.playlist_result(entries, song_id, title, f'{title} by {artist}') |