[yt-dlp.git] / youtube_dl / extractor / ssa.py

from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    unescapeHTML,
    parse_duration,
)


class SSAIE(InfoExtractor):
    _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
    _TEST = {
        'url': 'http://ssa.nls.uk/film/3561',
        'info_dict': {
            'id': '3561',
            'ext': 'flv',
            'title': 'SHETLAND WOOL',
            'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
            'duration': 900,
            'thumbnail': 're:^https?://.*\.jpg$',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        streamer = self._search_regex(
            r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
        play_path = self._search_regex(
            r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]

        def search_field(field_name, fatal=False):
            return self._search_regex(
                r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
                webpage, 'title', fatal=fatal)

        title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
        description = unescapeHTML(search_field('Description'))
        duration = parse_duration(search_field('Running time'))
        thumbnail = self._search_regex(
            r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)

        return {
            'id': video_id,
            'url': streamer,
            'play_path': play_path,
            'ext': 'flv',
            'title': title,
            'description': description,
            'duration': duration,
            'thumbnail': thumbnail,
        }
Commit	Line	Data
c792b501 S	1	from __future__ import unicode_literals
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	unescapeHTML,
	6	parse_duration,
	7	)
	8
	9
	10	class SSAIE(InfoExtractor):
	11	_VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
	12	_TEST = {
	13	'url': 'http://ssa.nls.uk/film/3561',
	14	'info_dict': {
	15	'id': '3561',
	16	'ext': 'flv',
	17	'title': 'SHETLAND WOOL',
	18	'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
	19	'duration': 900,
	20	'thumbnail': 're:^https?://.*\.jpg$',
	21	},
	22	'params': {
	23	# rtmp download
	24	'skip_download': True,
	25	},
	26	}
	27
	28	def _real_extract(self, url):
	29	video_id = self._match_id(url)
	30
	31	webpage = self._download_webpage(url, video_id)
	32
	33	streamer = self._search_regex(
	34	r"'streamer'\s,\S'(rtmp[^']+)'", webpage, 'streamer')
	35	play_path = self._search_regex(
	36	r"'file'\s,\s'([^']+)'", webpage, 'file').rpartition('.')[0]
	37
	38	def search_field(field_name, fatal=False):
	39	return self._search_regex(
	40	r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
	41	webpage, 'title', fatal=fatal)
	42
	43	title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
	44	description = unescapeHTML(search_field('Description'))
	45	duration = parse_duration(search_field('Running time'))
	46	thumbnail = self._search_regex(
	47	r"'image'\s,\s'([^']+)'", webpage, 'thumbnails', fatal=False)
	48
	49	return {
	50	'id': video_id,
	51	'url': streamer,
	52	'play_path': play_path,
	53	'ext': 'flv',
	54	'title': title,
	55	'description': description,
	56	'duration': duration,
	57	'thumbnail': thumbnail,
	58	}