]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/wordpress.py
[extractor/common] Fix `json_ld` type checks (#5145)
[yt-dlp.git] / yt_dlp / extractor / wordpress.py
CommitLineData
c9eba807 1from .common import InfoExtractor
2from ..utils import (
3 get_elements_by_class,
4 int_or_none,
5 parse_duration,
6 traverse_obj,
7)
8
9
10# https://codex.wordpress.org/Playlist_Shortcode
11class WordpressPlaylistEmbedIE(InfoExtractor):
12 _VALID_URL = False
13 IE_NAME = 'wordpress:playlist'
14 _WEBPAGE_TESTS = [{
15 # 5 WordPress playlists. This is using wpse-playlist, which is similar.
16 # See: https://github.com/birgire/wpse-playlist
17 'url': 'https://xlino.com/wordpress-playlist-shortcode-with-external-audio-or-video-files/',
18 'info_dict': {
19 'id': 'wordpress-playlist-shortcode-with-external-audio-or-video-files',
20 'title': 'WordPress: Playlist shortcode with external audio or video files – Birgir Erlendsson (birgire)',
21 'age_limit': 0,
22 },
23 'playlist_count': 5,
24 }, {
25 'url': 'https://pianoadventures.com/products/piano-adventures-level-1-lesson-book-enhanced-cd/',
26 'info_dict': {
27 'id': 'piano-adventures-level-1-lesson-book-enhanced-cd-wp-playlist-1',
28 'title': 'Wordpress Playlist',
29 'thumbnail': 'https://pianoadventures.com/wp-content/uploads/sites/13/2022/01/CD1002cover.jpg',
30 'age_limit': 0,
31 },
32 'playlist': [{
33 'info_dict': {
34 'id': 'CD1002-21',
35 'ext': 'mp3',
36 'title': '21 Half-Time Show',
37 'thumbnail': 'https://pianoadventures.com/wp-content/plugins/media-library-assistant/images/crystal/audio.png',
38 'album': 'Piano Adventures Level 1 Lesson Book (2nd Edition)',
39 'genre': 'Classical',
40 'duration': 49.0,
41 'artist': 'Nancy and Randall Faber',
42 'description': 'md5:a9f8e9aeabbd2912bc13cc0fab1a4ce8',
43 }
44 }],
45 'playlist_count': 6,
46 'params': {'skip_download': True}
47 }]
48
49 def _extract_from_webpage(self, url, webpage):
50 # class should always be "wp-playlist-script"
51 # See: https://core.trac.wordpress.org/browser/trunk/src/wp-includes/media.php#L2930
52 for i, j in enumerate(get_elements_by_class('wp-playlist-script', webpage)):
53 playlist_json = self._parse_json(j, self._generic_id(url), fatal=False, ignore_extra=True, errnote='') or {}
54 if not playlist_json:
55 continue
56 entries = [{
57 'id': self._generic_id(track['src']),
58 'title': track.get('title'),
59 'url': track.get('src'),
60 'thumbnail': traverse_obj(track, ('thumb', 'src')),
61 'album': traverse_obj(track, ('meta', 'album')),
62 'artist': traverse_obj(track, ('meta', 'artist')),
63 'genre': traverse_obj(track, ('meta', 'genre')),
64 'duration': parse_duration(traverse_obj(track, ('meta', 'length_formatted'))),
65 'description': track.get('description'),
66 'height': int_or_none(traverse_obj(track, ('dimensions', 'original', 'height'))),
67 'width': int_or_none(traverse_obj(track, ('dimensions', 'original', 'width'))),
68 } for track in traverse_obj(playlist_json, ('tracks', ...), expected_type=dict)]
69 yield self.playlist_result(entries, self._generic_id(url) + f'-wp-playlist-{i+1}', 'Wordpress Playlist')