]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/slideshare.py
4 from . common
import InfoExtractor
11 class SlideshareIE ( InfoExtractor
):
12 _VALID_URL
= r
'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
15 'url' : 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity' ,
19 'title' : 'Managing Scale and Complexity' ,
20 'description' : 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.' ,
24 def _real_extract ( self
, url
):
25 mobj
= self
._ match
_ valid
_u rl
( url
)
26 page_title
= mobj
. group ( 'title' )
27 webpage
= self
._ download
_ webpage
( url
, page_title
)
28 slideshare_obj
= self
._ search
_ regex
(
29 r
'\$\.extend\(.*?slideshare_object,\s*(\ {.*?\} )\);' ,
30 webpage
, 'slideshare object' )
31 info
= json
. loads ( slideshare_obj
)
32 if info
[ 'slideshow' ][ 'type' ] != 'video' :
33 raise ExtractorError ( 'Webpage type is "{}": only video extraction is supported for Slideshare' . format ( info
[ 'slideshow' ][ 'type' ]), expected
= True )
36 bucket
= info
[ 'jsplayer' ][ 'video_bucket' ]
37 ext
= info
[ 'jsplayer' ][ 'video_extension' ]
38 video_url
= urllib
. parse
. urljoin ( bucket
, doc
+ '-SD.' + ext
)
39 description
= get_element_by_id ( 'slideshow-description-paragraph' , webpage
) or self
._ html
_ search
_ regex
(
40 r
'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>' , webpage
,
41 'description' , fatal
= False )
45 'id' : info
[ 'slideshow' ][ 'id' ],
46 'title' : info
[ 'slideshow' ][ 'title' ],
49 'thumbnail' : info
[ 'slideshow' ][ 'pin_image_url' ],
50 'description' : description
. strip () if description
else None ,