]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/jove.py
1 from __future__
import unicode_literals
4 from .common
import InfoExtractor
11 class JoveIE(InfoExtractor
):
12 _VALID_URL
= r
'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
13 _CHAPTERS_URL
= 'http://www.jove.com/video-chapters?videoid={video_id:}'
16 'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
17 'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
21 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
22 'description': 'md5:015dd4509649c0908bc27f049e0262c6',
23 'thumbnail': r
're:^https?://.*\.png$',
24 'upload_date': '20110523',
28 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
29 'md5': '914aeb356f416811d911996434811beb',
33 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
34 'description': 'md5:35ff029261900583970c4023b70f1dc9',
35 'thumbnail': r
're:^https?://.*\.png$',
36 'upload_date': '20140802',
42 def _real_extract(self
, url
):
43 mobj
= self
._match
_valid
_url
(url
)
44 video_id
= mobj
.group('id')
46 webpage
= self
._download
_webpage
(url
, video_id
)
48 chapters_id
= self
._html
_search
_regex
(
49 r
'/video-chapters\?videoid=([0-9]+)', webpage
, 'chapters id')
51 chapters_xml
= self
._download
_xml
(
52 self
._CHAPTERS
_URL
.format(video_id
=chapters_id
),
53 video_id
, note
='Downloading chapters XML',
54 errnote
='Failed to download chapters XML')
56 video_url
= chapters_xml
.attrib
.get('video')
58 raise ExtractorError('Failed to get the video URL')
60 title
= self
._html
_search
_meta
('citation_title', webpage
, 'title')
61 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
62 description
= self
._html
_search
_regex
(
63 r
'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
64 webpage
, 'description', fatal
=False)
65 publish_date
= unified_strdate(self
._html
_search
_meta
(
66 'citation_publication_date', webpage
, 'publish date', fatal
=False))
67 comment_count
= int(self
._html
_search
_regex
(
68 r
'<meta name="num_comments" content="(\d+) Comments?"',
69 webpage
, 'comment count', fatal
=False))
75 'thumbnail': thumbnail
,
76 'description': description
,
77 'upload_date': publish_date
,
78 'comment_count': comment_count
,