]>
Commit | Line | Data |
---|---|---|
1e0daeb3 | 1 | import re |
1e0daeb3 | 2 | |
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | determine_ext, | |
1e0daeb3 | 6 | js_to_json, |
7 | mimetype2ext, | |
1e0daeb3 | 8 | traverse_obj, |
9 | ) | |
10 | ||
11 | ||
12 | class TV24UAVideoIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://24tv\.ua/news/showPlayer\.do.*?(?:\?|&)objectId=(?P<id>\d+)' | |
14 | _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?'] | |
15 | IE_NAME = '24tv.ua' | |
16 | _TESTS = [{ | |
17 | 'url': 'https://24tv.ua/news/showPlayer.do?objectId=2074790&videoUrl=2022/07/2074790&w=640&h=360', | |
18 | 'info_dict': { | |
19 | 'id': '2074790', | |
20 | 'ext': 'mp4', | |
21 | 'title': 'У Харкові ворожа ракета прилетіла в будинок, де слухали пісні про "офіцерів-росіян"', | |
22 | 'thumbnail': r're:^https?://.*\.jpe?g', | |
23 | } | |
24 | }, { | |
25 | 'url': 'https://24tv.ua/news/showPlayer.do?videoUrl=2022/07/2074790&objectId=2074790&w=640&h=360', | |
26 | 'only_matching': True, | |
27 | }] | |
28 | ||
29 | _WEBPAGE_TESTS = [ | |
30 | { | |
31 | # iframe embed created from share menu. | |
32 | 'url': 'data:text/html,%3Ciframe%20src=%22https://24tv.ua/news/showPlayer.do?objectId=1886193&videoUrl' | |
33 | '=2022/03/1886193&w=640&h=360%22%20width=%22640%22%20height=%22360%22%20frameborder=%220%22' | |
34 | '%20scrolling=%22no%22%3E%3C/iframe%3E', | |
35 | 'info_dict': { | |
36 | 'id': '1886193', | |
37 | 'ext': 'mp4', | |
38 | 'title': 'Росіяни руйнують Бородянку на Київщині та стріляють з літаків по мешканцях: шокуючі фото', | |
39 | 'thumbnail': r're:^https?://.*\.jpe?g', | |
40 | } | |
41 | }, | |
42 | { | |
43 | 'url': 'https://24tv.ua/vipalyuyut-nashi-mista-sela-dsns-pokazali-motoroshni-naslidki_n1883966', | |
44 | 'info_dict': { | |
45 | 'id': '1883966', | |
46 | 'ext': 'mp4', | |
47 | 'title': 'Випалюють наші міста та села, – моторошні наслідки обстрілів на Чернігівщині', | |
48 | 'thumbnail': r're:^https?://.*\.jpe?g', | |
49 | }, | |
50 | 'params': {'allowed_extractors': ['Generic', '24tv.ua']}, | |
51 | } | |
52 | ] | |
53 | ||
54 | def _real_extract(self, url): | |
55 | video_id = self._match_id(url) | |
56 | webpage = self._download_webpage(url, video_id) | |
57 | ||
58 | formats = [] | |
59 | subtitles = {} | |
60 | for j in re.findall(r'vPlayConfig\.sources\s*=\s*(?P<json>\[{\s*(?s:.+?)\s*}])', webpage): | |
61 | sources = self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or [] | |
62 | for source in sources: | |
63 | if mimetype2ext(traverse_obj(source, 'type')) == 'm3u8': | |
64 | f, s = self._extract_m3u8_formats_and_subtitles(source['src'], video_id) | |
65 | formats.extend(f) | |
66 | self._merge_subtitles(subtitles, s) | |
67 | else: | |
68 | formats.append({ | |
69 | 'url': source['src'], | |
70 | 'ext': determine_ext(source['src']), | |
71 | }) | |
72 | thumbnail = traverse_obj( | |
73 | self._search_json( | |
74 | r'var\s*vPlayConfig\s*=\s*', webpage, 'thumbnail', | |
75 | video_id, default=None, transform_source=js_to_json), 'poster') | |
76 | self._sort_formats(formats) | |
77 | return { | |
78 | 'id': video_id, | |
79 | 'formats': formats, | |
80 | 'subtitles': subtitles, | |
81 | 'thumbnail': thumbnail or self._og_search_thumbnail(webpage), | |
82 | 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), | |
83 | 'description': self._og_search_description(webpage, default=None), | |
84 | } |