]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/jove.py
1 from .common
import InfoExtractor
8 class JoveIE(InfoExtractor
):
9 _VALID_URL
= r
'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
10 _CHAPTERS_URL
= 'http://www.jove.com/video-chapters?videoid={video_id:}'
13 'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
14 'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
18 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
19 'description': 'md5:015dd4509649c0908bc27f049e0262c6',
20 'thumbnail': r
're:^https?://.*\.png$',
21 'upload_date': '20110523',
25 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
26 'md5': '914aeb356f416811d911996434811beb',
30 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
31 'description': 'md5:35ff029261900583970c4023b70f1dc9',
32 'thumbnail': r
're:^https?://.*\.png$',
33 'upload_date': '20140802',
39 def _real_extract(self
, url
):
40 mobj
= self
._match
_valid
_url
(url
)
41 video_id
= mobj
.group('id')
43 webpage
= self
._download
_webpage
(url
, video_id
)
45 chapters_id
= self
._html
_search
_regex
(
46 r
'/video-chapters\?videoid=([0-9]+)', webpage
, 'chapters id')
48 chapters_xml
= self
._download
_xml
(
49 self
._CHAPTERS
_URL
.format(video_id
=chapters_id
),
50 video_id
, note
='Downloading chapters XML',
51 errnote
='Failed to download chapters XML')
53 video_url
= chapters_xml
.attrib
.get('video')
55 raise ExtractorError('Failed to get the video URL')
57 title
= self
._html
_search
_meta
('citation_title', webpage
, 'title')
58 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
59 description
= self
._html
_search
_regex
(
60 r
'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
61 webpage
, 'description', fatal
=False)
62 publish_date
= unified_strdate(self
._html
_search
_meta
(
63 'citation_publication_date', webpage
, 'publish date', fatal
=False))
64 comment_count
= int(self
._html
_search
_regex
(
65 r
'<meta name="num_comments" content="(\d+) Comments?"',
66 webpage
, 'comment count', fatal
=False))
72 'thumbnail': thumbnail
,
73 'description': description
,
74 'upload_date': publish_date
,
75 'comment_count': comment_count
,