[yt-dlp.git] / youtube_dl / extractor / dumpert.py

# coding: utf-8
from __future__ import unicode_literals

import base64

from .common import InfoExtractor


class DumpertIE(InfoExtractor):
    _VALID_URL = (r'https?://(?:www\.)?dumpert\.nl/mediabase/'
                  r'(?P<id>[0-9]+/[0-9a-zA-Z]+)/?.*')
    _TEST = {
        'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
        'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
        'info_dict': {
            'id': '6646981/951bc60f',
            'ext': 'mp4',
            'title': 'Ik heb nieuws voor je',
            'description': 'Niet schrikken hoor'
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        title = self._html_search_meta('title', webpage)
        description = self._html_search_meta('description', webpage)

        files_base64 = self._html_search_regex(r'data-files="(.*?)"',
                                               webpage,
                                               'files')
        files_json = base64.b64decode(files_base64).decode('iso-8859-1')
        files = self._parse_json(files_json, video_id)

        format_names = ['flv', 'mobile', 'tablet', '720p']
        formats = [{'format_id': name,
                    'url': files[name].replace(r'\/', '/')}
                   for name in format_names
                   if name in files]

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats
        }
Commit	Line	Data
4d5d14f5 JS	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import base64
	5
	6	from .common import InfoExtractor
	7
	8
	9	class DumpertIE(InfoExtractor):
	10	_VALID_URL = (r'https?://(?:www\.)?dumpert\.nl/mediabase/'
	11	r'(?P<id>[0-9]+/[0-9a-zA-Z]+)/?.*')
	12	_TEST = {
	13	'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
	14	'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
	15	'info_dict': {
	16	'id': '6646981/951bc60f',
	17	'ext': 'mp4',
	18	'title': 'Ik heb nieuws voor je',
	19	'description': 'Niet schrikken hoor'
	20	}
	21	}
	22
	23	def _real_extract(self, url):
	24	video_id = self._match_id(url)
	25	webpage = self._download_webpage(url, video_id)
	26
	27	title = self._html_search_meta('title', webpage)
	28	description = self._html_search_meta('description', webpage)
	29
	30	files_base64 = self._html_search_regex(r'data-files="(.*?)"',
	31	webpage,
	32	'files')
	33	files_json = base64.b64decode(files_base64).decode('iso-8859-1')
	34	files = self._parse_json(files_json, video_id)
	35
	36	format_names = ['flv', 'mobile', 'tablet', '720p']
	37	formats = [{'format_id': name,
	38	'url': files[name].replace(r'\/', '/')}
	39	for name in format_names
	40	if name in files]
	41
	42	return {
	43	'id': video_id,
	44	'title': title,
	45	'description': description,
	46	'formats': formats
	47	}