jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	from .common import InfoExtractor
	2	from ..utils import (
	3	int_or_none,
	4	orderedSet,
	5	parse_duration,
	6	parse_qs,
	7	qualities,
	8	unified_strdate,
	9	xpath_text
	10	)
	11
	12
	13	class EuropaIE(InfoExtractor):
	14	_VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player\|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
	15	_TESTS = [{
	16	'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
	17	'md5': '574f080699ddd1e19a675b0ddf010371',
	18	'info_dict': {
	19	'id': 'I107758',
	20	'ext': 'mp4',
	21	'title': 'TRADE - Wikileaks on TTIP',
	22	'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
	23	'thumbnail': r're:^https?://.*\.jpg$',
	24	'upload_date': '20150811',
	25	'duration': 34,
	26	'view_count': int,
	27	'formats': 'mincount:3',
	28	}
	29	}, {
	30	'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
	31	'only_matching': True,
	32	}, {
	33	'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
	34	'only_matching': True,
	35	}]
	36
	37	def _real_extract(self, url):
	38	video_id = self._match_id(url)
	39
	40	playlist = self._download_xml(
	41	'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id)
	42
	43	def get_item(type_, preference):
	44	items = {}
	45	for item in playlist.findall('./info/%s/item' % type_):
	46	lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None)
	47	if lang and label:
	48	items[lang] = label.strip()
	49	for p in preference:
	50	if items.get(p):
	51	return items[p]
	52
	53	query = parse_qs(url)
	54	preferred_lang = query.get('sitelang', ('en', ))[0]
	55
	56	preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
	57
	58	title = get_item('title', preferred_langs) or video_id
	59	description = get_item('description', preferred_langs)
	60	thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
	61	upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
	62	duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
	63	view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
	64
	65	language_preference = qualities(preferred_langs[::-1])
	66
	67	formats = []
	68	for file_ in playlist.findall('./files/file'):
	69	video_url = xpath_text(file_, './url')
	70	if not video_url:
	71	continue
	72	lang = xpath_text(file_, './lg')
	73	formats.append({
	74	'url': video_url,
	75	'format_id': lang,
	76	'format_note': xpath_text(file_, './lglabel'),
	77	'language_preference': language_preference(lang)
	78	})
	79	self._sort_formats(formats)
	80
	81	return {
	82	'id': video_id,
	83	'title': title,
	84	'description': description,
	85	'thumbnail': thumbnail,
	86	'upload_date': upload_date,
	87	'duration': duration,
	88	'view_count': view_count,
	89	'formats': formats
	90	}