]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/megaphone.py
[cleanup, ie] Match both `http` and `https` in `_VALID_URL` (#8968)
[yt-dlp.git] / yt_dlp / extractor / megaphone.py
CommitLineData
24e966e8
PH
1from .common import InfoExtractor
2from ..utils import js_to_json
3
4
5class MegaphoneIE(InfoExtractor):
6 IE_NAME = 'megaphone.fm'
7 IE_DESC = 'megaphone.fm embedded players'
a687226b 8 _VALID_URL = r'https?://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
bfd973ec 9 _EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})']
24e966e8 10 _TEST = {
f4f9f6d0 11 'url': 'https://player.megaphone.fm/GLT9749789991',
24e966e8
PH
12 'md5': '4816a0de523eb3e972dc0dda2c191f96',
13 'info_dict': {
14 'id': 'GLT9749789991',
15 'ext': 'mp3',
16 'title': '#97 What Kind Of Idiot Gets Phished?',
c233003a 17 'thumbnail': r're:^https://.*\.png.*$',
f4f9f6d0 18 'duration': 1998.36,
19 'creators': ['Reply All'],
24e966e8
PH
20 },
21 }
22
23 def _real_extract(self, url):
24 video_id = self._match_id(url)
25 webpage = self._download_webpage(url, video_id)
26
27 title = self._og_search_property('audio:title', webpage)
28 author = self._og_search_property('audio:artist', webpage)
29 thumbnail = self._og_search_thumbnail(webpage)
30
31 episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON')
32 episode_data = self._parse_json(episode_json, video_id, js_to_json)
33 video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:')
34
35 formats = [{
36 'url': video_url,
37 }]
38
39 return {
40 'id': video_id,
41 'thumbnail': thumbnail,
42 'title': title,
f4f9f6d0 43 'creators': [author] if author else None,
24e966e8
PH
44 'duration': episode_data['duration'],
45 'formats': formats,
46 }