]>
Commit | Line | Data |
---|---|---|
96b9e9cf | 1 | import re |
2 | ||
5bcccbfe | 3 | from .common import InfoExtractor |
96b9e9cf | 4 | from ..utils import ( |
5 | clean_html, | |
6 | format_field, | |
7 | get_element_by_class, | |
8 | parse_duration, | |
9 | parse_qs, | |
10 | traverse_obj, | |
11 | unified_timestamp, | |
12 | update_url_query, | |
13 | url_basename, | |
14 | ) | |
5bcccbfe HTL |
15 | |
16 | ||
17 | class TelegramEmbedIE(InfoExtractor): | |
18 | IE_NAME = 'telegram:embed' | |
96b9e9cf | 19 | _VALID_URL = r'https?://t\.me/(?P<channel_id>[^/]+)/(?P<id>\d+)' |
5bcccbfe HTL |
20 | _TESTS = [{ |
21 | 'url': 'https://t.me/europa_press/613', | |
96b9e9cf | 22 | 'md5': 'dd707708aea958c11a590e8068825f22', |
5bcccbfe HTL |
23 | 'info_dict': { |
24 | 'id': '613', | |
25 | 'ext': 'mp4', | |
96b9e9cf | 26 | 'title': 'md5:6ce2d7e8d56eda16d80607b23db7b252', |
27 | 'description': 'md5:6ce2d7e8d56eda16d80607b23db7b252', | |
28 | 'channel_id': 'europa_press', | |
29 | 'channel': 'Europa Press ✔', | |
30 | 'thumbnail': r're:^https?://.+', | |
31 | 'timestamp': 1635631203, | |
32 | 'upload_date': '20211030', | |
33 | 'duration': 61, | |
34 | }, | |
35 | }, { | |
36 | # 2-video post | |
37 | 'url': 'https://t.me/vorposte/29342', | |
38 | 'info_dict': { | |
39 | 'id': 'vorposte-29342', | |
40 | 'title': 'Форпост 29342', | |
41 | 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc', | |
42 | }, | |
43 | 'playlist_count': 2, | |
44 | 'params': { | |
45 | 'skip_download': True, | |
46 | }, | |
47 | }, { | |
48 | # 2-video post with --no-playlist | |
49 | 'url': 'https://t.me/vorposte/29343', | |
50 | 'md5': '1724e96053c18e788c8464038876e245', | |
51 | 'info_dict': { | |
52 | 'id': '29343', | |
53 | 'ext': 'mp4', | |
54 | 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc', | |
55 | 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc', | |
56 | 'channel_id': 'vorposte', | |
57 | 'channel': 'Форпост', | |
58 | 'thumbnail': r're:^https?://.+', | |
59 | 'timestamp': 1666384480, | |
60 | 'upload_date': '20221021', | |
61 | 'duration': 35, | |
62 | }, | |
63 | 'params': { | |
64 | 'noplaylist': True, | |
65 | } | |
66 | }, { | |
67 | # 2-video post with 'single' query param | |
68 | 'url': 'https://t.me/vorposte/29342?single', | |
69 | 'md5': 'd20b202f1e41400a9f43201428add18f', | |
70 | 'info_dict': { | |
71 | 'id': '29342', | |
72 | 'ext': 'mp4', | |
73 | 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc', | |
74 | 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc', | |
75 | 'channel_id': 'vorposte', | |
76 | 'channel': 'Форпост', | |
77 | 'thumbnail': r're:^https?://.+', | |
78 | 'timestamp': 1666384480, | |
79 | 'upload_date': '20221021', | |
80 | 'duration': 33, | |
5bcccbfe HTL |
81 | }, |
82 | }] | |
83 | ||
84 | def _real_extract(self, url): | |
96b9e9cf | 85 | channel_id, msg_id = self._match_valid_url(url).group('channel_id', 'id') |
86 | embed = self._download_webpage( | |
87 | url, msg_id, query={'embed': '1', 'single': []}, note='Downloading embed frame') | |
5bcccbfe | 88 | |
96b9e9cf | 89 | def clean_text(html_class, html): |
90 | text = clean_html(get_element_by_class(html_class, html)) | |
91 | return text.replace('\n', ' ') if text else None | |
92 | ||
93 | description = clean_text('tgme_widget_message_text', embed) | |
94 | message = { | |
95 | 'title': description or '', | |
96 | 'description': description, | |
97 | 'channel': clean_text('tgme_widget_message_author', embed), | |
98 | 'channel_id': channel_id, | |
99 | 'timestamp': unified_timestamp(self._search_regex( | |
100 | r'<time[^>]*datetime="([^"]*)"', embed, 'timestamp', fatal=False)), | |
5bcccbfe | 101 | } |
96b9e9cf | 102 | |
103 | videos = [] | |
104 | for video in re.findall(r'<a class="tgme_widget_message_video_player(?s:.+?)</time>', embed): | |
105 | video_url = self._search_regex( | |
106 | r'<video[^>]+src="([^"]+)"', video, 'video URL', fatal=False) | |
107 | webpage_url = self._search_regex( | |
108 | r'<a class="tgme_widget_message_video_player[^>]+href="([^"]+)"', | |
109 | video, 'webpage URL', fatal=False) | |
110 | if not video_url or not webpage_url: | |
111 | continue | |
112 | formats = [{ | |
113 | 'url': video_url, | |
114 | 'ext': 'mp4', | |
115 | }] | |
96b9e9cf | 116 | videos.append({ |
117 | 'id': url_basename(webpage_url), | |
118 | 'webpage_url': update_url_query(webpage_url, {'single': True}), | |
119 | 'duration': parse_duration(self._search_regex( | |
120 | r'<time[^>]+duration[^>]*>([\d:]+)</time>', video, 'duration', fatal=False)), | |
121 | 'thumbnail': self._search_regex( | |
122 | r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', | |
123 | video, 'thumbnail', fatal=False), | |
124 | 'formats': formats, | |
125 | **message, | |
126 | }) | |
127 | ||
128 | playlist_id = None | |
129 | if len(videos) > 1 and 'single' not in parse_qs(url, keep_blank_values=True): | |
130 | playlist_id = f'{channel_id}-{msg_id}' | |
131 | ||
132 | if self._yes_playlist(playlist_id, msg_id): | |
133 | return self.playlist_result( | |
134 | videos, playlist_id, format_field(message, 'channel', f'%s {msg_id}'), description) | |
135 | else: | |
136 | return traverse_obj(videos, lambda _, x: x['id'] == msg_id, get_all=False) |