jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	from .common import InfoExtractor
	5	from ..utils import (
	6	determine_ext,
	7	int_or_none,
	8	parse_iso8601,
	9	)
	10
	11
	12	class HeiseIE(InfoExtractor):
	13	_VALID_URL = r'''(?x)
	14	https?://(?:www\.)?heise\.de/video/artikel/
	15	.+?(?P<id>[0-9]+)\.html(?:$\|[?#])
	16	'''
	17	_TEST = {
	18	'url': (
	19	'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
	20	),
	21	'md5': 'ffed432483e922e88545ad9f2f15d30e',
	22	'info_dict': {
	23	'id': '2404147',
	24	'ext': 'mp4',
	25	'title': (
	26	"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
	27	),
	28	'format_id': 'mp4_720p',
	29	'timestamp': 1411812600,
	30	'upload_date': '20140927',
	31	'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
	32	'thumbnail': 're:^https?://.*\.jpe?g$',
	33	}
	34	}
	35
	36	def _real_extract(self, url):
	37	video_id = self._match_id(url)
	38	webpage = self._download_webpage(url, video_id)
	39
	40	container_id = self._search_regex(
	41	r'<div class="videoplayerjw".*?data-container="([0-9]+)"',
	42	webpage, 'container ID')
	43	sequenz_id = self._search_regex(
	44	r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"',
	45	webpage, 'sequenz ID')
	46	data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id)
	47	doc = self._download_xml(data_url, video_id)
	48
	49	info = {
	50	'id': video_id,
	51	'thumbnail': self._og_search_thumbnail(webpage),
	52	'timestamp': parse_iso8601(
	53	self._html_search_meta('date', webpage)),
	54	'description': self._og_search_description(webpage),
	55	}
	56
	57	title = self._html_search_meta('fulltitle', webpage)
	58	if title:
	59	info['title'] = title
	60	else:
	61	info['title'] = self._og_search_title(webpage)
	62
	63	formats = []
	64	for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
	65	label = source_node.attrib['label']
	66	height = int_or_none(self._search_regex(
	67	r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
	68	video_url = source_node.attrib['file']
	69	ext = determine_ext(video_url, '')
	70	formats.append({
	71	'url': video_url,
	72	'format_note': label,
	73	'format_id': '%s_%s' % (ext, label),
	74	'height': height,
	75	})
	76	self._sort_formats(formats)
	77	info['formats'] = formats
	78
	79	return info