[yt-dlp.git] / youtube_dl / extractor / dreisat.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import unified_strdate


class DreiSatIE(InfoExtractor):
    IE_NAME = '3sat'
    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
    _TEST = {
        'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
        'md5': '9dcfe344732808dbfcc901537973c922',
        'info_dict': {
            'id': '36983',
            'ext': 'mp4',
            'title': 'Kaffeeland Schweiz',
            'description': 'md5:cc4424b18b75ae9948b13929a0814033',
            'uploader': '3sat',
            'upload_date': '20130622'
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
        details_doc = self._download_xml(details_url, video_id, 'Downloading video details')

        thumbnail_els = details_doc.findall('.//teaserimage')
        thumbnails = [{
            'width': int(te.attrib['key'].partition('x')[0]),
            'height': int(te.attrib['key'].partition('x')[2]),
            'url': te.text,
        } for te in thumbnail_els]

        information_el = details_doc.find('.//information')
        video_title = information_el.find('./title').text
        video_description = information_el.find('./detail').text

        details_el = details_doc.find('.//details')
        video_uploader = details_el.find('./channel').text
        upload_date = unified_strdate(details_el.find('./airtime').text)

        format_els = details_doc.findall('.//formitaet')
        formats = [{
            'format_id': fe.attrib['basetype'],
            'width': int(fe.find('./width').text),
            'height': int(fe.find('./height').text),
            'url': fe.find('./url').text,
            'filesize': int(fe.find('./filesize').text),
            'video_bitrate': int(fe.find('./videoBitrate').text),
        } for fe in format_els
            if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]

        self._sort_formats(formats)

        return {
            '_type': 'video',
            'id': video_id,
            'title': video_title,
            'formats': formats,
            'description': video_description,
            'thumbnails': thumbnails,
            'thumbnail': thumbnails[-1]['url'],
            'uploader': video_uploader,
            'upload_date': upload_date,
        }
Commit	Line	Data
e0b4cc48	1	from __future__ import unicode_literals
73e79f2a PH	2
73e79f2a PH	3	import re
73e79f2a PH	4
73e79f2a PH	5	from .common import InfoExtractor
e0b4cc48	6	from ..utils import unified_strdate
73e79f2a PH	7
	8
	9	class DreiSatIE(InfoExtractor):
	10	IE_NAME = '3sat'
71cd2a57	11	_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode\|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
73e79f2a	12	_TEST = {
e0b4cc48 S	13	'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
	14	'md5': '9dcfe344732808dbfcc901537973c922',
	15	'info_dict': {
	16	'id': '36983',
	17	'ext': 'mp4',
	18	'title': 'Kaffeeland Schweiz',
	19	'description': 'md5:cc4424b18b75ae9948b13929a0814033',
	20	'uploader': '3sat',
	21	'upload_date': '20130622'
73e79f2a PH	22	}
	23	}
	24
73e79f2a PH	25	def _real_extract(self, url):
	26	mobj = re.match(self._VALID_URL, url)
	27	video_id = mobj.group('id')
	28	details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
e0b4cc48	29	details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
73e79f2a PH	30
	31	thumbnail_els = details_doc.findall('.//teaserimage')
	32	thumbnails = [{
15e42340 S	33	'width': int(te.attrib['key'].partition('x')[0]),
15e42340 S	34	'height': int(te.attrib['key'].partition('x')[2]),
73e79f2a PH	35	'url': te.text,
	36	} for te in thumbnail_els]
	37
	38	information_el = details_doc.find('.//information')
	39	video_title = information_el.find('./title').text
	40	video_description = information_el.find('./detail').text
	41
	42	details_el = details_doc.find('.//details')
	43	video_uploader = details_el.find('./channel').text
	44	upload_date = unified_strdate(details_el.find('./airtime').text)
	45
	46	format_els = details_doc.findall('.//formitaet')
	47	formats = [{
	48	'format_id': fe.attrib['basetype'],
	49	'width': int(fe.find('./width').text),
	50	'height': int(fe.find('./height').text),
	51	'url': fe.find('./url').text,
	52	'filesize': int(fe.find('./filesize').text),
	53	'video_bitrate': int(fe.find('./videoBitrate').text),
73e79f2a PH	54	} for fe in format_els
	55	if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
	56
55e663a8	57	self._sort_formats(formats)
73e79f2a	58
fb7abb31	59	return {
690e872c	60	'_type': 'video',
73e79f2a PH	61	'id': video_id,
	62	'title': video_title,
	63	'formats': formats,
	64	'description': video_description,
	65	'thumbnails': thumbnails,
	66	'thumbnail': thumbnails[-1]['url'],
	67	'uploader': video_uploader,
	68	'upload_date': upload_date,
	69	}