]>
Commit | Line | Data |
---|---|---|
27231526 ZM |
1 | import urllib.parse |
2 | ||
3 | from .common import InfoExtractor | |
3d2623a8 | 4 | from ..networking import HEADRequest |
27231526 | 5 | from ..utils import ( |
27231526 ZM |
6 | ExtractorError, |
7 | determine_ext, | |
66587603 | 8 | make_archive_id, |
27231526 | 9 | scale_thumbnails_to_max_format_width, |
27231526 ZM |
10 | ) |
11 | ||
12 | ||
66587603 | 13 | class AntennaBaseIE(InfoExtractor): |
27231526 | 14 | def _download_and_extract_api_data(self, video_id, netloc, cid=None): |
66587603 SN |
15 | info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}', |
16 | video_id, query={'cid': cid or video_id}) | |
17 | if not info.get('url'): | |
18 | raise ExtractorError(f'No source found for {video_id}') | |
19 | ||
20 | ext = determine_ext(info['url']) | |
21 | if ext == 'm3u8': | |
22 | formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4') | |
23 | else: | |
24 | formats, subs = [{'url': info['url'], 'format_id': ext}], {} | |
25 | ||
27231526 | 26 | thumbnails = scale_thumbnails_to_max_format_width( |
66587603 | 27 | formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else [] |
27231526 ZM |
28 | return { |
29 | 'id': video_id, | |
30 | 'title': info.get('title'), | |
31 | 'thumbnails': thumbnails, | |
32 | 'formats': formats, | |
33 | 'subtitles': subs, | |
34 | } | |
35 | ||
36 | ||
66587603 SN |
37 | class AntennaGrWatchIE(AntennaBaseIE): |
38 | IE_NAME = 'antenna:watch' | |
39 | IE_DESC = 'antenna.gr and ant1news.gr videos' | |
40 | _VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/' | |
27231526 ZM |
41 | _API_PATH = '/templates/data/player' |
42 | ||
43 | _TESTS = [{ | |
44 | 'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45', | |
66587603 | 45 | 'md5': 'c472d9dd7cd233c63aff2ea42201cda6', |
27231526 ZM |
46 | 'info_dict': { |
47 | 'id': '1506168', | |
48 | 'ext': 'mp4', | |
49 | 'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a', | |
50 | 'description': 'md5:18665af715a6dcfeac1d6153a44f16b0', | |
66587603 SN |
51 | 'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg', |
52 | }, | |
53 | }, { | |
54 | 'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01', | |
55 | 'md5': '8f6f7dd3b1dba4d835ba990e25f31243', | |
56 | 'info_dict': { | |
57 | 'id': '1643812', | |
58 | 'ext': 'mp4', | |
59 | 'format_id': 'mp4', | |
60 | 'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01', | |
61 | 'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg', | |
27231526 ZM |
62 | }, |
63 | }] | |
64 | ||
65 | def _real_extract(self, url): | |
66 | video_id, netloc = self._match_valid_url(url).group('id', 'netloc') | |
67 | webpage = self._download_webpage(url, video_id) | |
68 | info = self._download_and_extract_api_data(video_id, netloc) | |
66587603 | 69 | info['description'] = self._og_search_description(webpage, default=None) |
93240fc1 | 70 | info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)] |
27231526 ZM |
71 | return info |
72 | ||
73 | ||
66587603 | 74 | class Ant1NewsGrArticleIE(AntennaBaseIE): |
27231526 ZM |
75 | IE_NAME = 'ant1newsgr:article' |
76 | IE_DESC = 'ant1news.gr articles' | |
77 | _VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/' | |
78 | ||
79 | _TESTS = [{ | |
80 | 'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', | |
1ed5ee2f | 81 | 'md5': '57eb8d12181f0fa2b14b0b138e1de9b6', |
27231526 ZM |
82 | 'info_dict': { |
83 | 'id': '_xvg/m_cmbatw=', | |
84 | 'ext': 'mp4', | |
85 | 'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', | |
1ed5ee2f | 86 | 'timestamp': 1666166520, |
87 | 'upload_date': '20221019', | |
88 | 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg', | |
27231526 ZM |
89 | }, |
90 | }, { | |
91 | 'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', | |
92 | 'info_dict': { | |
93 | 'id': '620286', | |
94 | 'title': 'md5:91fe569e952e4d146485740ae927662b', | |
95 | }, | |
96 | 'playlist_mincount': 2, | |
97 | 'params': { | |
98 | 'skip_download': True, | |
99 | }, | |
100 | }] | |
101 | ||
102 | def _real_extract(self, url): | |
103 | video_id = self._match_id(url) | |
104 | webpage = self._download_webpage(url, video_id) | |
105 | info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') | |
bfd973ec | 106 | embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) |
27231526 ZM |
107 | if not embed_urls: |
108 | raise ExtractorError('no videos found for %s' % video_id, expected=True) | |
08d30158 | 109 | return self.playlist_from_matches( |
110 | embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), | |
27231526 ZM |
111 | video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) |
112 | ||
113 | ||
66587603 | 114 | class Ant1NewsGrEmbedIE(AntennaBaseIE): |
27231526 ZM |
115 | IE_NAME = 'ant1newsgr:embed' |
116 | IE_DESC = 'ant1news.gr embedded videos' | |
117 | _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' | |
118 | _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' | |
bfd973ec | 119 | _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] |
1ed5ee2f | 120 | _API_PATH = '/templates/data/jsonPlayer' |
27231526 ZM |
121 | |
122 | _TESTS = [{ | |
123 | 'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', | |
124 | 'md5': 'dfc58c3a11a5a9aad2ba316ed447def3', | |
125 | 'info_dict': { | |
126 | 'id': '3f_li_c_az_jw_y_u=', | |
127 | 'ext': 'mp4', | |
128 | 'title': 'md5:a30c93332455f53e1e84ae0724f0adf7', | |
129 | 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/bbe31201-3f09-4a4e-87f5-8ad2159fffe2.jpg', | |
130 | }, | |
131 | }] | |
132 | ||
27231526 ZM |
133 | def _real_extract(self, url): |
134 | video_id = self._match_id(url) | |
135 | ||
136 | canonical_url = self._request_webpage( | |
137 | HEADRequest(url), video_id, | |
138 | note='Resolve canonical player URL', | |
3d2623a8 | 139 | errnote='Could not resolve canonical player URL').url |
27231526 ZM |
140 | _, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url) |
141 | cid = urllib.parse.parse_qs(query)['cid'][0] | |
142 | ||
143 | return self._download_and_extract_api_data(video_id, netloc, cid=cid) |