]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/airmozilla.py
669556b98fe2b4dd1f3bd041beb27a5ec48179df
3 from . common
import InfoExtractor
11 class AirMozillaIE ( InfoExtractor
):
12 _VALID_URL
= r
'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
14 'url' : 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/' ,
15 'md5' : '8d02f53ee39cf006009180e21df1f3ba' ,
19 'title' : 'Privacy Lab - a meetup for privacy minded people in San Francisco' ,
20 'thumbnail' : r
're:https?://.*/poster\.jpg' ,
21 'description' : 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...' ,
22 'timestamp' : 1422487800 ,
23 'upload_date' : '20150128' ,
24 'location' : 'SFO Commons' ,
27 'categories' : [ 'Main' , 'Privacy' ],
31 def _real_extract ( self
, url
):
32 display_id
= self
._ match
_ id
( url
)
33 webpage
= self
._ download
_ webpage
( url
, display_id
)
34 video_id
= self
._ html
_ search
_ regex
( r
'//vid\.ly/(.*?)/embed' , webpage
, 'id' )
36 embed_script
= self
._ download
_ webpage
( 'https://vid.ly/ {0} /embed' . format ( video_id
), video_id
)
37 jwconfig
= self
._ parse
_ json
( self
._ search
_ regex
(
38 r
'initCallback\((.*)\);' , embed_script
, 'metadata' ), video_id
)[ 'config' ]
40 info_dict
= self
._ parse
_ jwplayer
_ data
( jwconfig
, video_id
)
41 view_count
= int_or_none ( self
._ html
_ search
_ regex
(
42 r
'Views since archived: ([0-9]+)' ,
43 webpage
, 'view count' , fatal
= False ))
44 timestamp
= parse_iso8601 ( self
._ html
_ search
_ regex
(
45 r
'<time datetime="(.*?)"' , webpage
, 'timestamp' , fatal
= False ))
46 duration
= parse_duration ( self
._ search
_ regex
(
47 r
'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)' ,
48 webpage
, 'duration' , fatal
= False ))
52 'title' : self
._ og
_ search
_ title
( webpage
),
53 'url' : self
._ og
_ search
_u rl
( webpage
),
54 'display_id' : display_id
,
55 'description' : self
._ og
_ search
_ description
( webpage
),
56 'timestamp' : timestamp
,
57 'location' : self
._ html
_ search
_ regex
( r
'Location: (.*)' , webpage
, 'location' , default
= None ),
59 'view_count' : view_count
,
60 'categories' : re
. findall ( r
'<a href=".*?" class="channel">(.*?)</a>' , webpage
),