[yt-dlp.git] / youtube_dl / extractor / dhm.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor

import urllib2
import xml.etree.ElementTree as ET
import re


class DHMIE(InfoExtractor):
    _VALID_URL = r'http://www\.dhm\.de/filmarchiv/(?P<id>.*?)'

    _TEST = {
        'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
        'md5': '11c475f670209bf6acca0b2b7ef51827',
        'info_dict': {
            'id': 'marshallwg',
            'ext': 'flv',
            'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
            'thumbnail': 'http://www.dhm.de/filmarchiv/video/mpworkwg.jpg',
        }
    }

    def _real_extract(self, url):
        video_id = ''
        webpage = self._download_webpage(url, video_id)

        title = self._html_search_regex(
            r'dc:title=\"(.*?)\"', webpage, 'title')

        playlist_url = self._html_search_regex(
            r'file: \'(.*?)\'', webpage, 'playlist URL')

        xml_file = urllib2.urlopen(playlist_url)
        data = xml_file.read()
        xml_file.close()

        root = ET.fromstring(data)
        video_url = root[0][0][0].text
        thumbnail = root[0][0][2].text

        m = re.search('video/(.+?).flv', video_url)
        if m:
            video_id = m.group(1)

        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'thumbnail': thumbnail,
        }
Commit	Line	Data
643fe727 OJ	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	from .common import InfoExtractor
	5
	6	import urllib2
	7	import xml.etree.ElementTree as ET
	8	import re
	9
	10
	11	class DHMIE(InfoExtractor):
	12	_VALID_URL = r'http://www\.dhm\.de/filmarchiv/(?P<id>.*?)'
	13
	14	_TEST = {
	15	'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
	16	'md5': '11c475f670209bf6acca0b2b7ef51827',
	17	'info_dict': {
	18	'id': 'marshallwg',
	19	'ext': 'flv',
	20	'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
	21	'thumbnail': 'http://www.dhm.de/filmarchiv/video/mpworkwg.jpg',
	22	}
	23	}
	24
	25	def _real_extract(self, url):
	26	video_id = ''
	27	webpage = self._download_webpage(url, video_id)
	28
	29	title = self._html_search_regex(
	30	r'dc:title=\"(.*?)\"', webpage, 'title')
	31
	32	playlist_url = self._html_search_regex(
	33	r'file: \'(.*?)\'', webpage, 'playlist URL')
	34
	35	xml_file = urllib2.urlopen(playlist_url)
	36	data = xml_file.read()
	37	xml_file.close()
	38
	39	root = ET.fromstring(data)
	40	video_url = root[0][0][0].text
	41	thumbnail = root[0][0][2].text
	42
	43	m = re.search('video/(.+?).flv', video_url)
	44	if m:
	45	video_id = m.group(1)
	46
	47	return {
	48	'id': video_id,
	49	'title': title,
	50	'url': video_url,
	51	'thumbnail': thumbnail,
	52	}