[yt-dlp.git] / youtube_dl / extractor / zapiks.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    parse_duration,
    parse_iso8601,
    xpath_with_ns,
    xpath_text,
    int_or_none,
)


class ZapiksIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
    _TESTS = [
        {
            'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
            'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
            'info_dict': {
                'id': '80798',
                'ext': 'mp4',
                'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
                'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 528,
                'timestamp': 1359044972,
                'upload_date': '20130124',
                'view_count': int,
                'comment_count': int,
            },
        },
        {
            'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
            'only_matching': True,
        },
        {
            'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
            'only_matching': True,
        },
        {
            'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
            'only_matching': True,
        },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id') or video_id

        webpage = self._download_webpage(url, display_id)

        if not video_id:
            video_id = self._search_regex(
                r'data-media-id="(\d+)"', webpage, 'video id')

        playlist = self._download_xml(
            'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
            display_id)

        NS_MAP = {
            'jwplayer': 'http://rss.jwpcdn.com/'
        }

        def ns(path):
            return xpath_with_ns(path, NS_MAP)

        item = playlist.find('./channel/item')

        title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
        description = self._og_search_description(webpage, default=None)
        thumbnail = xpath_text(
            item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
        duration = parse_duration(self._html_search_meta(
            'duration', webpage, 'duration', default=None))
        timestamp = parse_iso8601(self._html_search_meta(
            'uploadDate', webpage, 'upload date', default=None), ' ')

        view_count = int_or_none(self._search_regex(
            r'UserPlays:(\d+)', webpage, 'view count', default=None))
        comment_count = int_or_none(self._search_regex(
            r'UserComments:(\d+)', webpage, 'comment count', default=None))

        formats = []
        for source in item.findall(ns('./jwplayer:source')):
            format_id = source.attrib['label']
            f = {
                'url': source.attrib['file'],
                'format_id': format_id,
            }
            m = re.search(r'^(?P<height>\d+)[pP]', format_id)
            if m:
                f['height'] = int(m.group('height'))
            formats.append(f)
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'timestamp': timestamp,
            'view_count': view_count,
            'comment_count': comment_count,
            'formats': formats,
        }
Commit	Line	Data
4aeccadf S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	parse_duration,
	9	parse_iso8601,
	10	xpath_with_ns,
	11	xpath_text,
	12	int_or_none,
	13	)
	14
	15
	16	class ZapiksIE(InfoExtractor):
ea5152ca	17	_VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr\|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html\|index\.php\?.*\bmedia_id=(?P<id>\d+))'
4aeccadf S	18	_TESTS = [
	19	{
	20	'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
	21	'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
	22	'info_dict': {
	23	'id': '80798',
	24	'ext': 'mp4',
	25	'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
	26	'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
	27	'thumbnail': 're:^https?://.*\.jpg$',
	28	'duration': 528,
	29	'timestamp': 1359044972,
	30	'upload_date': '20130124',
	31	'view_count': int,
	32	'comment_count': int,
	33	},
	34	},
ea5152ca S	35	{
	36	'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
	37	'only_matching': True,
	38	},
	39	{
	40	'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
	41	'only_matching': True,
	42	},
4aeccadf S	43	{
	44	'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr',
	45	'only_matching': True,
	46	},
	47	]
	48
	49	def _real_extract(self, url):
	50	mobj = re.match(self._VALID_URL, url)
	51	video_id = mobj.group('id')
	52	display_id = mobj.group('display_id') or video_id
	53
	54	webpage = self._download_webpage(url, display_id)
	55
	56	if not video_id:
	57	video_id = self._search_regex(
	58	r'data-media-id="(\d+)"', webpage, 'video id')
	59
	60	playlist = self._download_xml(
	61	'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
	62	display_id)
	63
	64	NS_MAP = {
	65	'jwplayer': 'http://rss.jwpcdn.com/'
	66	}
	67
	68	def ns(path):
	69	return xpath_with_ns(path, NS_MAP)
	70
	71	item = playlist.find('./channel/item')
	72
	73	title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
	74	description = self._og_search_description(webpage, default=None)
	75	thumbnail = xpath_text(
	76	item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
	77	duration = parse_duration(self._html_search_meta(
	78	'duration', webpage, 'duration', default=None))
	79	timestamp = parse_iso8601(self._html_search_meta(
	80	'uploadDate', webpage, 'upload date', default=None), ' ')
	81
	82	view_count = int_or_none(self._search_regex(
	83	r'UserPlays:(\d+)', webpage, 'view count', default=None))
	84	comment_count = int_or_none(self._search_regex(
	85	r'UserComments:(\d+)', webpage, 'comment count', default=None))
	86
	87	formats = []
	88	for source in item.findall(ns('./jwplayer:source')):
	89	format_id = source.attrib['label']
	90	f = {
	91	'url': source.attrib['file'],
	92	'format_id': format_id,
	93	}
	94	m = re.search(r'^(?P<height>\d+)[pP]', format_id)
	95	if m:
	96	f['height'] = int(m.group('height'))
	97	formats.append(f)
	98	self._sort_formats(formats)
	99
	100	return {
	101	'id': video_id,
	102	'title': title,
	103	'description': description,
	104	'thumbnail': thumbnail,
	105	'duration': duration,
	106	'timestamp': timestamp,
107	'view_count': view_count,
108	'comment_count': comment_count,
109	'formats': formats,
110	}