jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	from .common import InfoExtractor
	5	from ..compat import compat_urlparse
	6	from ..utils import (
	7	fix_xml_ampersands,
	8	float_or_none,
	9	xpath_with_ns,
	10	xpath_text,
	11	)
	12
	13
	14	class KarriereVideosIE(InfoExtractor):
	15	_VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
	16	_TESTS = [{
	17	'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
	18	'info_dict': {
	19	'id': '32c91',
	20	'ext': 'flv',
	21	'title': 'AltenpflegerIn',
	22	'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
	23	'thumbnail': r're:^http://.*\.png',
	24	},
	25	'params': {
	26	# rtmp download
	27	'skip_download': True,
	28	}
	29	}, {
	30	# broken ampersands
	31	'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun',
	32	'info_dict': {
	33	'id': '5sniu',
	34	'ext': 'flv',
	35	'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
	36	'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
	37	'thumbnail': r're:^http://.*\.png',
	38	},
	39	'params': {
	40	# rtmp download
	41	'skip_download': True,
	42	}
	43	}]
	44
	45	def _real_extract(self, url):
	46	video_id = self._match_id(url)
	47
	48	webpage = self._download_webpage(url, video_id)
	49
	50	title = (self._html_search_meta('title', webpage, default=None)
	51	or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
	52
	53	video_id = self._search_regex(
	54	r'/config/video/(.+?)\.xml', webpage, 'video id')
	55	# Server returns malformed headers
	56	# Force Accept-Encoding: * to prevent gzipped results
	57	playlist = self._download_xml(
	58	'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
	59	video_id, transform_source=fix_xml_ampersands,
	60	headers={'Accept-Encoding': '*'})
	61
	62	NS_MAP = {
	63	'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
	64	}
	65
	66	def ns(path):
	67	return xpath_with_ns(path, NS_MAP)
	68
	69	item = playlist.find('./tracklist/item')
	70	video_file = xpath_text(
	71	item, ns('./jwplayer:file'), 'video url', fatal=True)
	72	streamer = xpath_text(
	73	item, ns('./jwplayer:streamer'), 'streamer', fatal=True)
	74
	75	uploader = xpath_text(
	76	item, ns('./jwplayer:author'), 'uploader')
	77	duration = float_or_none(
	78	xpath_text(item, ns('./jwplayer:duration'), 'duration'))
	79
	80	description = self._html_search_regex(
	81	r'(?s)<div class="leadtext">(.+?)</div>',
	82	webpage, 'description')
	83
	84	thumbnail = self._html_search_meta(
	85	'thumbnail', webpage, 'thumbnail')
	86	if thumbnail:
	87	thumbnail = compat_urlparse.urljoin(url, thumbnail)
	88
	89	return {
	90	'id': video_id,
	91	'url': streamer.replace('rtmpt', 'rtmp'),
	92	'play_path': 'mp4:%s' % video_file,
	93	'ext': 'flv',
	94	'title': title,
	95	'description': description,
	96	'thumbnail': thumbnail,
	97	'uploader': uploader,
	98	'duration': duration,
	99	}