[yt-dlp.git] / yt_dlp / extractor / jove.py

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    unified_strdate
)


class JoveIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
    _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
    _TESTS = [
        {
            'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
            'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
            'info_dict': {
                'id': '2744',
                'ext': 'mp4',
                'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
                'description': 'md5:015dd4509649c0908bc27f049e0262c6',
                'thumbnail': r're:^https?://.*\.png$',
                'upload_date': '20110523',
            }
        },
        {
            'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
            'md5': '914aeb356f416811d911996434811beb',
            'info_dict': {
                'id': '51796',
                'ext': 'mp4',
                'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
                'description': 'md5:35ff029261900583970c4023b70f1dc9',
                'thumbnail': r're:^https?://.*\.png$',
                'upload_date': '20140802',
            }
        },

    ]

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

        chapters_id = self._html_search_regex(
            r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')

        chapters_xml = self._download_xml(
            self._CHAPTERS_URL.format(video_id=chapters_id),
            video_id, note='Downloading chapters XML',
            errnote='Failed to download chapters XML')

        video_url = chapters_xml.attrib.get('video')
        if not video_url:
            raise ExtractorError('Failed to get the video URL')

        title = self._html_search_meta('citation_title', webpage, 'title')
        thumbnail = self._og_search_thumbnail(webpage)
        description = self._html_search_regex(
            r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
            webpage, 'description', fatal=False)
        publish_date = unified_strdate(self._html_search_meta(
            'citation_publication_date', webpage, 'publish date', fatal=False))
        comment_count = int(self._html_search_regex(
            r'<meta name="num_comments" content="(\d+) Comments?"',
            webpage, 'comment count', fatal=False))

        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'thumbnail': thumbnail,
            'description': description,
            'upload_date': publish_date,
            'comment_count': comment_count,
        }
Commit	Line	Data
a229909f	1	from .common import InfoExtractor
fe556f1b S	2	from ..utils import (
	3	ExtractorError,
	4	unified_strdate
	5	)
a229909f NJ	6
	7
	8	class JoveIE(InfoExtractor):
	9	_VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
	10	_CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
fe556f1b S	11	_TESTS = [
	12	{
	13	'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
	14	'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
	15	'info_dict': {
	16	'id': '2744',
	17	'ext': 'mp4',
	18	'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
	19	'description': 'md5:015dd4509649c0908bc27f049e0262c6',
ec85ded8	20	'thumbnail': r're:^https?://.*\.png$',
fe556f1b S	21	'upload_date': '20110523',
	22	}
	23	},
	24	{
	25	'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
	26	'md5': '914aeb356f416811d911996434811beb',
	27	'info_dict': {
	28	'id': '51796',
	29	'ext': 'mp4',
	30	'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
	31	'description': 'md5:35ff029261900583970c4023b70f1dc9',
ec85ded8	32	'thumbnail': r're:^https?://.*\.png$',
fe556f1b S	33	'upload_date': '20140802',
	34	}
	35	},
	36
	37	]
a229909f NJ	38
a229909f NJ	39	def _real_extract(self, url):
5ad28e7f	40	mobj = self._match_valid_url(url)
a229909f NJ	41	video_id = mobj.group('id')
	42
	43	webpage = self._download_webpage(url, video_id)
a229909f	44
a229909f NJ	45	chapters_id = self._html_search_regex(
a229909f NJ	46	r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
fe556f1b	47
a229909f NJ	48	chapters_xml = self._download_xml(
a229909f NJ	49	self._CHAPTERS_URL.format(video_id=chapters_id),
fe556f1b S	50	video_id, note='Downloading chapters XML',
	51	errnote='Failed to download chapters XML')
	52
a229909f NJ	53	video_url = chapters_xml.attrib.get('video')
	54	if not video_url:
	55	raise ExtractorError('Failed to get the video URL')
	56
fe556f1b S	57	title = self._html_search_meta('citation_title', webpage, 'title')
	58	thumbnail = self._og_search_thumbnail(webpage)
	59	description = self._html_search_regex(
	60	r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
	61	webpage, 'description', fatal=False)
	62	publish_date = unified_strdate(self._html_search_meta(
	63	'citation_publication_date', webpage, 'publish date', fatal=False))
34646967	64	comment_count = int(self._html_search_regex(
fe556f1b	65	r'<meta name="num_comments" content="(\d+) Comments?"',
34646967	66	webpage, 'comment count', fatal=False))
a229909f NJ	67
	68	return {
	69	'id': video_id,
	70	'title': title,
	71	'url': video_url,
a229909f NJ	72	'thumbnail': thumbnail,
	73	'description': description,
	74	'upload_date': publish_date,
fe556f1b	75	'comment_count': comment_count,
a229909f	76	}