]>
Commit | Line | Data |
---|---|---|
ef12dbdc S |
1 | import itertools |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..networking.exceptions import HTTPError | |
5 | from ..utils import ( | |
6 | ExtractorError, | |
7 | extract_attributes, | |
8 | get_element_by_class, | |
9 | get_element_html_by_class, | |
10 | get_element_text_and_html_by_tag, | |
11 | get_elements_html_by_class, | |
12 | int_or_none, | |
13 | join_nonempty, | |
14 | try_call, | |
15 | unified_strdate, | |
16 | update_url, | |
17 | urljoin | |
18 | ) | |
19 | from ..utils.traversal import traverse_obj | |
20 | ||
21 | ||
22 | class RadioComercialIE(InfoExtractor): | |
23 | _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)' | |
24 | _TESTS = [{ | |
25 | 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper', | |
26 | 'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4', | |
27 | 'info_dict': { | |
28 | 'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas', | |
29 | 'ext': 'mp3', | |
30 | 'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.', | |
31 | 'release_date': '20231025', | |
32 | 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', | |
f4f9f6d0 | 33 | 'season': 'Season 6', |
34 | 'season_number': 6, | |
ef12dbdc S |
35 | } |
36 | }, { | |
37 | 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem', | |
38 | 'md5': '47e96c273aef96a8eb160cd6cf46d782', | |
39 | 'info_dict': { | |
40 | 'id': 'convenca-me-num-minuto-que-os-lobisomens-existem', | |
41 | 'ext': 'mp3', | |
42 | 'title': 'Convença-me num minuto que os lobisomens existem', | |
43 | 'release_date': '20231026', | |
44 | 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', | |
f4f9f6d0 | 45 | 'season': 'Season 3', |
46 | 'season_number': 3, | |
ef12dbdc S |
47 | } |
48 | }, { | |
49 | 'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao', | |
50 | 'md5': '69be64255420fec23b7259955d771e54', | |
51 | 'info_dict': { | |
52 | 'id': 'o-desastre-de-aviao', | |
53 | 'ext': 'mp3', | |
54 | 'title': 'O desastre de avião', | |
55 | 'description': 'md5:8a82beeb372641614772baab7246245f', | |
56 | 'release_date': '20231101', | |
57 | 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', | |
f4f9f6d0 | 58 | 'season': 'Season 2', |
59 | 'season_number': 2, | |
ef12dbdc S |
60 | }, |
61 | 'params': { | |
62 | # inconsistant md5 | |
63 | 'skip_download': True, | |
64 | }, | |
65 | }, { | |
66 | 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro', | |
67 | 'md5': '91d32d4d4b1407272068b102730fc9fa', | |
68 | 'info_dict': { | |
69 | 'id': 't-n-t-29-de-outubro', | |
70 | 'ext': 'mp3', | |
71 | 'title': 'T.N.T 29 de outubro', | |
72 | 'release_date': '20231029', | |
73 | 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', | |
f4f9f6d0 | 74 | 'season': 'Season 2023', |
75 | 'season_number': 2023, | |
ef12dbdc S |
76 | } |
77 | }] | |
78 | ||
79 | def _real_extract(self, url): | |
80 | video_id, season = self._match_valid_url(url).group('id', 'season') | |
81 | webpage = self._download_webpage(url, video_id) | |
82 | return { | |
83 | 'id': video_id, | |
84 | 'title': self._html_extract_title(webpage), | |
85 | 'description': self._og_search_description(webpage, default=None), | |
86 | 'release_date': unified_strdate(get_element_by_class( | |
87 | 'date', get_element_html_by_class('descriptions', webpage) or '')), | |
88 | 'thumbnail': self._og_search_thumbnail(webpage), | |
f4f9f6d0 | 89 | 'season_number': int_or_none(season), |
ef12dbdc S |
90 | 'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'), |
91 | } | |
92 | ||
93 | ||
94 | class RadioComercialPlaylistIE(InfoExtractor): | |
95 | _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])' | |
96 | _TESTS = [{ | |
97 | 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3', | |
98 | 'info_dict': { | |
99 | 'id': 'convenca-me-num-minuto_t3', | |
100 | 'title': 'Convença-me num Minuto - Temporada 3', | |
101 | }, | |
102 | 'playlist_mincount': 32 | |
103 | }, { | |
104 | 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao', | |
105 | 'info_dict': { | |
106 | 'id': 'o-homem-que-mordeu-o-cao', | |
107 | 'title': 'O Homem Que Mordeu o Cão', | |
108 | }, | |
109 | 'playlist_mincount': 19 | |
110 | }, { | |
111 | 'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas', | |
112 | 'info_dict': { | |
113 | 'id': 'as-minhas-coisas-favoritas', | |
114 | 'title': 'As Minhas Coisas Favoritas', | |
115 | }, | |
116 | 'playlist_mincount': 131 | |
117 | }, { | |
118 | 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023', | |
119 | 'info_dict': { | |
120 | 'id': 'tnt-todos-no-top_t2023', | |
121 | 'title': 'TNT - Todos No Top - Temporada 2023', | |
122 | }, | |
123 | 'playlist_mincount': 39 | |
124 | }] | |
125 | ||
126 | def _entries(self, url, playlist_id): | |
127 | for page in itertools.count(1): | |
128 | try: | |
129 | webpage = self._download_webpage( | |
130 | f'{url}/{page}', playlist_id, f'Downloading page {page}') | |
131 | except ExtractorError as e: | |
132 | if isinstance(e.cause, HTTPError) and e.cause.status == 404: | |
133 | break | |
134 | raise | |
135 | ||
136 | episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage) | |
137 | if not episodes: | |
138 | break | |
139 | for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')): | |
140 | episode_url = urljoin(url, url_path) | |
141 | if RadioComercialIE.suitable(episode_url): | |
142 | yield episode_url | |
143 | ||
144 | def _real_extract(self, url): | |
145 | podcast, season = self._match_valid_url(url).group('id', 'season') | |
146 | playlist_id = join_nonempty(podcast, season, delim='_t') | |
147 | url = update_url(url, query=None, fragment=None) | |
148 | webpage = self._download_webpage(url, playlist_id) | |
149 | ||
150 | name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) | |
151 | title = name if name == season else join_nonempty(name, season, delim=' - Temporada ') | |
152 | ||
153 | return self.playlist_from_matches( | |
154 | self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE) |