[yt-dlp.git] / yt_dlp / extractor / telegram.py

from .common import InfoExtractor


class TelegramEmbedIE(InfoExtractor):
    IE_NAME = 'telegram:embed'
    _VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://t.me/europa_press/613',
        'info_dict': {
            'id': '613',
            'ext': 'mp4',
            'title': 'Europa Press',
            'description': '6ce2d7e8d56eda16d80607b23db7b252',
            'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+',
        },
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)

        formats = [{
            'url': self._proto_relative_url(self._search_regex(
                '<video[^>]+src="([^"]+)"', webpage_embed, 'source')),
            'ext': 'mp4',
        }]
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
            'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
            'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
                                            webpage_embed, 'thumbnail'),
            'formats': formats,
        }
Commit	Line	Data
5bcccbfe HTL	1	from .common import InfoExtractor
	2
	3
	4	class TelegramEmbedIE(InfoExtractor):
	5	IE_NAME = 'telegram:embed'
	6	_VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)'
	7	_TESTS = [{
	8	'url': 'https://t.me/europa_press/613',
	9	'info_dict': {
	10	'id': '613',
	11	'ext': 'mp4',
	12	'title': 'Europa Press',
	13	'description': '6ce2d7e8d56eda16d80607b23db7b252',
	14	'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+',
	15	},
	16	}]
	17
	18	def _real_extract(self, url):
	19	video_id = self._match_id(url)
	20	webpage = self._download_webpage(url, video_id)
	21	webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)
	22
	23	formats = [{
	24	'url': self._proto_relative_url(self._search_regex(
	25	'<video[^>]+src="([^"]+)"', webpage_embed, 'source')),
	26	'ext': 'mp4',
	27	}]
	28	self._sort_formats(formats)
	29
	30	return {
	31	'id': video_id,
	32	'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
	33	'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
	34	'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
	35	webpage_embed, 'thumbnail'),
	36	'formats': formats,
	37	}