]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/iheart.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / iheart.py
1 from .common import InfoExtractor
2 from ..utils import (
3 clean_html,
4 clean_podcast_url,
5 int_or_none,
6 str_or_none,
7 )
8
9
10 class IHeartRadioBaseIE(InfoExtractor):
11 def _call_api(self, path, video_id, fatal=True, query=None):
12 return self._download_json(
13 'https://api.iheart.com/api/v3/podcast/' + path,
14 video_id, fatal=fatal, query=query)
15
16 def _extract_episode(self, episode):
17 return {
18 'thumbnail': episode.get('imageUrl'),
19 'description': clean_html(episode.get('description')),
20 'timestamp': int_or_none(episode.get('startDate'), 1000),
21 'duration': int_or_none(episode.get('duration')),
22 }
23
24
25 class IHeartRadioIE(IHeartRadioBaseIE):
26 IE_NAME = 'iheartradio'
27 _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
28 _TEST = {
29 'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
30 'md5': 'c8609c92c8688dcb69d8541042b8abca',
31 'info_dict': {
32 'id': '70346499',
33 'ext': 'mp3',
34 'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
35 'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
36 'timestamp': 1597741200,
37 'upload_date': '20200818',
38 }
39 }
40
41 def _real_extract(self, url):
42 episode_id = self._match_id(url)
43 episode = self._call_api(
44 'episodes/' + episode_id, episode_id)['episode']
45 info = self._extract_episode(episode)
46 info.update({
47 'id': episode_id,
48 'title': episode['title'],
49 'url': clean_podcast_url(episode['mediaUrl']),
50 })
51 return info
52
53
54 class IHeartRadioPodcastIE(IHeartRadioBaseIE):
55 IE_NAME = 'iheartradio:podcast'
56 _VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
57 _TESTS = [{
58 'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
59 'info_dict': {
60 'id': '30717896',
61 'title': 'It Could Happen Here',
62 'description': 'md5:5842117412a967eb0b01f8088eb663e2',
63 },
64 'playlist_mincount': 11,
65 }, {
66 'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
67 'only_matching': True,
68 }]
69
70 def _real_extract(self, url):
71 podcast_id = self._match_id(url)
72 path = 'podcasts/' + podcast_id
73 episodes = self._call_api(
74 path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
75
76 entries = []
77 for episode in episodes:
78 episode_id = str_or_none(episode.get('id'))
79 if not episode_id:
80 continue
81 info = self._extract_episode(episode)
82 info.update({
83 '_type': 'url',
84 'id': episode_id,
85 'title': episode.get('title'),
86 'url': 'iheartradio:' + episode_id,
87 'ie_key': IHeartRadioIE.ie_key(),
88 })
89 entries.append(info)
90
91 podcast = self._call_api(path, podcast_id, False) or {}
92
93 return self.playlist_result(
94 entries, podcast_id, podcast.get('title'), podcast.get('description'))