]>
Commit | Line | Data |
---|---|---|
1e0daeb3 | 1 | import re |
1e0daeb3 | 2 | |
3 | from .common import InfoExtractor | |
d5d1df8a | 4 | from ..utils import determine_ext, js_to_json, mimetype2ext, traverse_obj |
1e0daeb3 | 5 | |
6 | ||
7 | class TV24UAVideoIE(InfoExtractor): | |
8 | _VALID_URL = r'https?://24tv\.ua/news/showPlayer\.do.*?(?:\?|&)objectId=(?P<id>\d+)' | |
9 | _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?'] | |
10 | IE_NAME = '24tv.ua' | |
11 | _TESTS = [{ | |
12 | 'url': 'https://24tv.ua/news/showPlayer.do?objectId=2074790&videoUrl=2022/07/2074790&w=640&h=360', | |
13 | 'info_dict': { | |
14 | 'id': '2074790', | |
15 | 'ext': 'mp4', | |
16 | 'title': 'У Харкові ворожа ракета прилетіла в будинок, де слухали пісні про "офіцерів-росіян"', | |
17 | 'thumbnail': r're:^https?://.*\.jpe?g', | |
18 | } | |
19 | }, { | |
20 | 'url': 'https://24tv.ua/news/showPlayer.do?videoUrl=2022/07/2074790&objectId=2074790&w=640&h=360', | |
21 | 'only_matching': True, | |
22 | }] | |
23 | ||
24 | _WEBPAGE_TESTS = [ | |
25 | { | |
26 | # iframe embed created from share menu. | |
27 | 'url': 'data:text/html,%3Ciframe%20src=%22https://24tv.ua/news/showPlayer.do?objectId=1886193&videoUrl' | |
28 | '=2022/03/1886193&w=640&h=360%22%20width=%22640%22%20height=%22360%22%20frameborder=%220%22' | |
29 | '%20scrolling=%22no%22%3E%3C/iframe%3E', | |
30 | 'info_dict': { | |
31 | 'id': '1886193', | |
32 | 'ext': 'mp4', | |
33 | 'title': 'Росіяни руйнують Бородянку на Київщині та стріляють з літаків по мешканцях: шокуючі фото', | |
34 | 'thumbnail': r're:^https?://.*\.jpe?g', | |
35 | } | |
36 | }, | |
37 | { | |
38 | 'url': 'https://24tv.ua/vipalyuyut-nashi-mista-sela-dsns-pokazali-motoroshni-naslidki_n1883966', | |
39 | 'info_dict': { | |
40 | 'id': '1883966', | |
41 | 'ext': 'mp4', | |
42 | 'title': 'Випалюють наші міста та села, – моторошні наслідки обстрілів на Чернігівщині', | |
43 | 'thumbnail': r're:^https?://.*\.jpe?g', | |
44 | }, | |
45 | 'params': {'allowed_extractors': ['Generic', '24tv.ua']}, | |
46 | } | |
47 | ] | |
48 | ||
49 | def _real_extract(self, url): | |
50 | video_id = self._match_id(url) | |
51 | webpage = self._download_webpage(url, video_id) | |
52 | ||
53 | formats = [] | |
54 | subtitles = {} | |
55 | for j in re.findall(r'vPlayConfig\.sources\s*=\s*(?P<json>\[{\s*(?s:.+?)\s*}])', webpage): | |
56 | sources = self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or [] | |
57 | for source in sources: | |
58 | if mimetype2ext(traverse_obj(source, 'type')) == 'm3u8': | |
59 | f, s = self._extract_m3u8_formats_and_subtitles(source['src'], video_id) | |
60 | formats.extend(f) | |
61 | self._merge_subtitles(subtitles, s) | |
62 | else: | |
63 | formats.append({ | |
64 | 'url': source['src'], | |
65 | 'ext': determine_ext(source['src']), | |
66 | }) | |
67 | thumbnail = traverse_obj( | |
68 | self._search_json( | |
69 | r'var\s*vPlayConfig\s*=\s*', webpage, 'thumbnail', | |
70 | video_id, default=None, transform_source=js_to_json), 'poster') | |
1e0daeb3 | 71 | return { |
72 | 'id': video_id, | |
73 | 'formats': formats, | |
74 | 'subtitles': subtitles, | |
75 | 'thumbnail': thumbnail or self._og_search_thumbnail(webpage), | |
62b8dac4 | 76 | 'title': self._generic_title('', webpage), |
1e0daeb3 | 77 | 'description': self._og_search_description(webpage, default=None), |
78 | } |