]> jfr.im git - yt-dlp.git/commitdiff
[extractor/nekohacker] Add extractor (#7003)
authorhasezoey <redacted>
Mon, 29 May 2023 10:52:01 +0000 (12:52 +0200)
committerGitHub <redacted>
Mon, 29 May 2023 10:52:01 +0000 (10:52 +0000)
Authored by: hasezoey

yt_dlp/extractor/_extractors.py
yt_dlp/extractor/nekohacker.py [new file with mode: 0644]

index 31af6bd3f896ed30f673f00d61dec1a0df53babe..f9fa84c43bab773d430f034224940cdc520d3a28 100644 (file)
     NebulaSubscriptionsIE,
     NebulaChannelIE,
 )
+from .nekohacker import NekoHackerIE
 from .nerdcubed import NerdCubedFeedIE
 from .netzkino import NetzkinoIE
 from .neteasemusic import (
diff --git a/yt_dlp/extractor/nekohacker.py b/yt_dlp/extractor/nekohacker.py
new file mode 100644 (file)
index 0000000..e10ffe9
--- /dev/null
@@ -0,0 +1,217 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    determine_ext,
+    extract_attributes,
+    get_element_by_class,
+    get_element_text_and_html_by_tag,
+    parse_duration,
+    traverse_obj,
+    try_call,
+    url_or_none,
+)
+
+
+class NekoHackerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nekohacker\.com/(?P<id>(?!free-dl)[\w-]+)'
+    _TESTS = [{
+        'url': 'https://nekohacker.com/nekoverse/',
+        'info_dict': {
+            'id': 'nekoverse',
+            'title': 'Nekoverse',
+        },
+        'playlist': [
+            {
+                'url': 'https://nekohacker.com/wp-content/uploads/2022/11/01-Spaceship.mp3',
+                'md5': '44223701ebedba0467ebda4cc07fb3aa',
+                'info_dict': {
+                    'id': '1712',
+                    'ext': 'mp3',
+                    'title': 'Spaceship',
+                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
+                    'vcodec': 'none',
+                    'acodec': 'mp3',
+                    'release_date': '20221101',
+                    'album': 'Nekoverse',
+                    'artist': 'Neko Hacker',
+                    'track': 'Spaceship',
+                    'track_number': 1,
+                    'duration': 195.0
+                }
+            },
+            {
+                'url': 'https://nekohacker.com/wp-content/uploads/2022/11/02-City-Runner.mp3',
+                'md5': '8f853c71719389d32bbbd3f1a87b3f08',
+                'info_dict': {
+                    'id': '1713',
+                    'ext': 'mp3',
+                    'title': 'City Runner',
+                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
+                    'vcodec': 'none',
+                    'acodec': 'mp3',
+                    'release_date': '20221101',
+                    'album': 'Nekoverse',
+                    'artist': 'Neko Hacker',
+                    'track': 'City Runner',
+                    'track_number': 2,
+                    'duration': 148.0
+                }
+            },
+            {
+                'url': 'https://nekohacker.com/wp-content/uploads/2022/11/03-Nature-Talk.mp3',
+                'md5': '5a8a8ae852720cee4c0ac95c7d1a7450',
+                'info_dict': {
+                    'id': '1714',
+                    'ext': 'mp3',
+                    'title': 'Nature Talk',
+                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
+                    'vcodec': 'none',
+                    'acodec': 'mp3',
+                    'release_date': '20221101',
+                    'album': 'Nekoverse',
+                    'artist': 'Neko Hacker',
+                    'track': 'Nature Talk',
+                    'track_number': 3,
+                    'duration': 174.0
+                }
+            },
+            {
+                'url': 'https://nekohacker.com/wp-content/uploads/2022/11/04-Crystal-World.mp3',
+                'md5': 'd8e59a48061764e50d92386a294abd50',
+                'info_dict': {
+                    'id': '1715',
+                    'ext': 'mp3',
+                    'title': 'Crystal World',
+                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
+                    'vcodec': 'none',
+                    'acodec': 'mp3',
+                    'release_date': '20221101',
+                    'album': 'Nekoverse',
+                    'artist': 'Neko Hacker',
+                    'track': 'Crystal World',
+                    'track_number': 4,
+                    'duration': 199.0
+                }
+            }
+        ]
+    }, {
+        'url': 'https://nekohacker.com/susume/',
+        'info_dict': {
+            'id': 'susume',
+            'title': '進め!むじなカンパニー',
+        },
+        'playlist': [
+            {
+                'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-feat.-六科なじむ-CV_-日高里菜-割戶真友-CV_-金元寿子-軽井沢ユキ-CV_-上坂すみれ-出稼ぎガルシア-CV_-金子彩花-.mp3',
+                'md5': 'fb13f008aa81f26ba48f91fd2d6186ce',
+                'info_dict': {
+                    'id': '711',
+                    'ext': 'mp3',
+                    'title': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
+                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
+                    'vcodec': 'none',
+                    'acodec': 'mp3',
+                    'release_date': '20210115',
+                    'album': '進め!むじなカンパニー',
+                    'artist': 'Neko Hacker',
+                    'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
+                    'track_number': 1,
+                    'duration': None
+                }
+            },
+            {
+                'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-feat.-六科なじむ-CV_-日高里菜-.mp3',
+                'md5': '028803f70241df512b7764e73396fdd1',
+                'info_dict': {
+                    'id': '709',
+                    'ext': 'mp3',
+                    'title': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
+                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
+                    'vcodec': 'none',
+                    'acodec': 'mp3',
+                    'release_date': '20210115',
+                    'album': '進め!むじなカンパニー',
+                    'artist': 'Neko Hacker',
+                    'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
+                    'track_number': 2,
+                    'duration': None
+                }
+            },
+            {
+                'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-instrumental.mp3',
+                'md5': 'adde9e9a16e1da5e602b579c247d0fb9',
+                'info_dict': {
+                    'id': '710',
+                    'ext': 'mp3',
+                    'title': '進め!むじなカンパニー (instrumental)',
+                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
+                    'vcodec': 'none',
+                    'acodec': 'mp3',
+                    'release_date': '20210115',
+                    'album': '進め!むじなカンパニー',
+                    'artist': 'Neko Hacker',
+                    'track': '進め!むじなカンパニー (instrumental)',
+                    'track_number': 3,
+                    'duration': None
+                }
+            },
+            {
+                'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-instrumental.mp3',
+                'md5': 'ebb0443039cf5f9ff7fd557ed9b23599',
+                'info_dict': {
+                    'id': '712',
+                    'ext': 'mp3',
+                    'title': 'むじな de なじむ (instrumental)',
+                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
+                    'vcodec': 'none',
+                    'acodec': 'mp3',
+                    'release_date': '20210115',
+                    'album': '進め!むじなカンパニー',
+                    'artist': 'Neko Hacker',
+                    'track': 'むじな de なじむ (instrumental)',
+                    'track_number': 4,
+                    'duration': None
+                }
+            }
+        ]
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+        playlist = get_element_by_class('playlist', webpage)
+
+        if not playlist:
+            iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or ''
+            iframe_src = url_or_none(extract_attributes(iframe).get('src'))
+            if not iframe_src:
+                raise ExtractorError('No playlist or embed found in webpage')
+            elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src):
+                raise ExtractorError('Spotify embeds are not supported', expected=True)
+            return self.url_result(url, 'Generic')
+
+        entries = []
+        for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1):
+            entry = traverse_obj(extract_attributes(track), {
+                'url': ('data-audiopath', {url_or_none}),
+                'ext': ('data-audiopath', {determine_ext}),
+                'id': 'data-trackid',
+                'title': 'data-tracktitle',
+                'track': 'data-tracktitle',
+                'album': 'data-albumtitle',
+                'duration': ('data-tracktime', {parse_duration}),
+                'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
+                'thumbnail': ('data-albumart', {url_or_none}),
+            })
+            entries.append({
+                **entry,
+                'track_number': track_number,
+                'artist': 'Neko Hacker',
+                'vcodec': 'none',
+                'acodec': 'mp3' if entry['ext'] == 'mp3' else None,
+            })
+
+        return self.playlist_result(entries, playlist_id, traverse_obj(entries, (0, 'album')))