]>
Commit | Line | Data |
---|---|---|
ef12dbdc S |
1 | import itertools |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..networking.exceptions import HTTPError | |
5 | from ..utils import ( | |
6 | ExtractorError, | |
7 | extract_attributes, | |
8 | get_element_by_class, | |
9 | get_element_html_by_class, | |
10 | get_element_text_and_html_by_tag, | |
11 | get_elements_html_by_class, | |
12 | int_or_none, | |
13 | join_nonempty, | |
14 | try_call, | |
15 | unified_strdate, | |
16 | update_url, | |
17 | urljoin | |
18 | ) | |
19 | from ..utils.traversal import traverse_obj | |
20 | ||
21 | ||
22 | class RadioComercialIE(InfoExtractor): | |
23 | _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)' | |
24 | _TESTS = [{ | |
25 | 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper', | |
26 | 'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4', | |
27 | 'info_dict': { | |
28 | 'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas', | |
29 | 'ext': 'mp3', | |
30 | 'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.', | |
31 | 'release_date': '20231025', | |
32 | 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', | |
33 | 'season': 6 | |
34 | } | |
35 | }, { | |
36 | 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem', | |
37 | 'md5': '47e96c273aef96a8eb160cd6cf46d782', | |
38 | 'info_dict': { | |
39 | 'id': 'convenca-me-num-minuto-que-os-lobisomens-existem', | |
40 | 'ext': 'mp3', | |
41 | 'title': 'Convença-me num minuto que os lobisomens existem', | |
42 | 'release_date': '20231026', | |
43 | 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', | |
44 | 'season': 3 | |
45 | } | |
46 | }, { | |
47 | 'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao', | |
48 | 'md5': '69be64255420fec23b7259955d771e54', | |
49 | 'info_dict': { | |
50 | 'id': 'o-desastre-de-aviao', | |
51 | 'ext': 'mp3', | |
52 | 'title': 'O desastre de avião', | |
53 | 'description': 'md5:8a82beeb372641614772baab7246245f', | |
54 | 'release_date': '20231101', | |
55 | 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', | |
56 | 'season': 2 | |
57 | }, | |
58 | 'params': { | |
59 | # inconsistant md5 | |
60 | 'skip_download': True, | |
61 | }, | |
62 | }, { | |
63 | 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro', | |
64 | 'md5': '91d32d4d4b1407272068b102730fc9fa', | |
65 | 'info_dict': { | |
66 | 'id': 't-n-t-29-de-outubro', | |
67 | 'ext': 'mp3', | |
68 | 'title': 'T.N.T 29 de outubro', | |
69 | 'release_date': '20231029', | |
70 | 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', | |
71 | 'season': 2023 | |
72 | } | |
73 | }] | |
74 | ||
75 | def _real_extract(self, url): | |
76 | video_id, season = self._match_valid_url(url).group('id', 'season') | |
77 | webpage = self._download_webpage(url, video_id) | |
78 | return { | |
79 | 'id': video_id, | |
80 | 'title': self._html_extract_title(webpage), | |
81 | 'description': self._og_search_description(webpage, default=None), | |
82 | 'release_date': unified_strdate(get_element_by_class( | |
83 | 'date', get_element_html_by_class('descriptions', webpage) or '')), | |
84 | 'thumbnail': self._og_search_thumbnail(webpage), | |
85 | 'season': int_or_none(season), | |
86 | 'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'), | |
87 | } | |
88 | ||
89 | ||
90 | class RadioComercialPlaylistIE(InfoExtractor): | |
91 | _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])' | |
92 | _TESTS = [{ | |
93 | 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3', | |
94 | 'info_dict': { | |
95 | 'id': 'convenca-me-num-minuto_t3', | |
96 | 'title': 'Convença-me num Minuto - Temporada 3', | |
97 | }, | |
98 | 'playlist_mincount': 32 | |
99 | }, { | |
100 | 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao', | |
101 | 'info_dict': { | |
102 | 'id': 'o-homem-que-mordeu-o-cao', | |
103 | 'title': 'O Homem Que Mordeu o Cão', | |
104 | }, | |
105 | 'playlist_mincount': 19 | |
106 | }, { | |
107 | 'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas', | |
108 | 'info_dict': { | |
109 | 'id': 'as-minhas-coisas-favoritas', | |
110 | 'title': 'As Minhas Coisas Favoritas', | |
111 | }, | |
112 | 'playlist_mincount': 131 | |
113 | }, { | |
114 | 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023', | |
115 | 'info_dict': { | |
116 | 'id': 'tnt-todos-no-top_t2023', | |
117 | 'title': 'TNT - Todos No Top - Temporada 2023', | |
118 | }, | |
119 | 'playlist_mincount': 39 | |
120 | }] | |
121 | ||
122 | def _entries(self, url, playlist_id): | |
123 | for page in itertools.count(1): | |
124 | try: | |
125 | webpage = self._download_webpage( | |
126 | f'{url}/{page}', playlist_id, f'Downloading page {page}') | |
127 | except ExtractorError as e: | |
128 | if isinstance(e.cause, HTTPError) and e.cause.status == 404: | |
129 | break | |
130 | raise | |
131 | ||
132 | episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage) | |
133 | if not episodes: | |
134 | break | |
135 | for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')): | |
136 | episode_url = urljoin(url, url_path) | |
137 | if RadioComercialIE.suitable(episode_url): | |
138 | yield episode_url | |
139 | ||
140 | def _real_extract(self, url): | |
141 | podcast, season = self._match_valid_url(url).group('id', 'season') | |
142 | playlist_id = join_nonempty(podcast, season, delim='_t') | |
143 | url = update_url(url, query=None, fragment=None) | |
144 | webpage = self._download_webpage(url, playlist_id) | |
145 | ||
146 | name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) | |
147 | title = name if name == season else join_nonempty(name, season, delim=' - Temporada ') | |
148 | ||
149 | return self.playlist_from_matches( | |
150 | self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE) |