jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3
	4	from .common import InfoExtractor
	5	from ..utils import (
	6	ExtractorError,
	7	unified_strdate
	8	)
	9
	10
	11	class JoveIE(InfoExtractor):
	12	_VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
	13	_CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
	14	_TESTS = [
	15	{
	16	'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
	17	'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
	18	'info_dict': {
	19	'id': '2744',
	20	'ext': 'mp4',
	21	'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
	22	'description': 'md5:015dd4509649c0908bc27f049e0262c6',
	23	'thumbnail': r're:^https?://.*\.png$',
	24	'upload_date': '20110523',
	25	}
	26	},
	27	{
	28	'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
	29	'md5': '914aeb356f416811d911996434811beb',
	30	'info_dict': {
	31	'id': '51796',
	32	'ext': 'mp4',
	33	'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
	34	'description': 'md5:35ff029261900583970c4023b70f1dc9',
	35	'thumbnail': r're:^https?://.*\.png$',
	36	'upload_date': '20140802',
	37	}
	38	},
	39
	40	]
	41
	42	def _real_extract(self, url):
	43	mobj = self._match_valid_url(url)
	44	video_id = mobj.group('id')
	45
	46	webpage = self._download_webpage(url, video_id)
	47
	48	chapters_id = self._html_search_regex(
	49	r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
	50
	51	chapters_xml = self._download_xml(
	52	self._CHAPTERS_URL.format(video_id=chapters_id),
	53	video_id, note='Downloading chapters XML',
	54	errnote='Failed to download chapters XML')
	55
	56	video_url = chapters_xml.attrib.get('video')
	57	if not video_url:
	58	raise ExtractorError('Failed to get the video URL')
	59
	60	title = self._html_search_meta('citation_title', webpage, 'title')
	61	thumbnail = self._og_search_thumbnail(webpage)
	62	description = self._html_search_regex(
	63	r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
	64	webpage, 'description', fatal=False)
	65	publish_date = unified_strdate(self._html_search_meta(
	66	'citation_publication_date', webpage, 'publish date', fatal=False))
	67	comment_count = int(self._html_search_regex(
	68	r'<meta name="num_comments" content="(\d+) Comments?"',
	69	webpage, 'comment count', fatal=False))
	70
	71	return {
	72	'id': video_id,
	73	'title': title,
	74	'url': video_url,
	75	'thumbnail': thumbnail,
	76	'description': description,
	77	'upload_date': publish_date,
	78	'comment_count': comment_count,
	79	}