]>
jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/democracynow.py
2 from __future__
import unicode_literals
5 from . common
import InfoExtractor
8 class DemocracynowIE ( InfoExtractor
):
9 _VALID_URL
= r
'https?://(?:www\.)?democracynow.org/?(?P<id>[^\?]*)'
10 IE_NAME
= 'democracynow'
12 'url' : 'http://www.democracynow.org/shows/2015/7/3' ,
14 'id' : '2015-0703-001' ,
16 'title' : 'July 03, 2015 - Democracy Now!' ,
17 'description' : 'A daily independent global news hour with Amy Goodman & Juan Gonz \xe1 lez "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs' ,
18 'uploader' : 'Democracy Now' ,
22 'url' : 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree' ,
24 'id' : '2015-0703-001' ,
26 'title' : '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag' ,
27 'description' : 'md5:4d2bc4f0d29f5553c2210a4bc7761a21' ,
28 'uploader' : 'Democracy Now' ,
33 def _real_extract ( self
, url
):
34 display_id
= self
._ match
_ id
( url
)
35 base_host
= re
. search ( r
'^(.+?://[^/]+)' , url
). group ( 1 )
38 webpage
= self
._ download
_ webpage
( url
, display_id
)
39 description
= self
._ og
_ search
_ description
( webpage
)
41 jstr
= self
._ search
_ regex
( r
'<script[^>]+type="text/json"[^>]*>\s*( {[^>]+} )' , webpage
, 'json' )
42 js
= self
._ parse
_ json
( jstr
, display_id
)
46 for key
in ( 'caption_file' , '.......' ):
47 # ....... = pending vtt support that doesn't clobber srt 'chapter_file':
49 if url
== '' or url
is None :
51 if not re
. match ( r
'^https?://' , url
):
53 ext
= re
. search ( r
'\.([^\.]+)$' , url
). group ( 1 )
58 for key
in ( 'file' , 'audio' , 'video' ):
60 if url
== '' or url
is None :
62 if not re
. match ( r
'^https?://' , url
):
64 purl
= re
. search ( r
'/(?P<dir>[^/]+)/(?:dn)?(?P<fn>[^/]+?)\.(?P<ext>[^\.\?]+)(?P<hasparams>\?|$)' , url
)
66 video_id
= purl
. group ( 'fn' )
67 if js
. get ( 'start' ) is not None :
68 url
+= '&' if purl
. group ( 'hasparams' ) == '?' else '?'
69 url
= url
+ 'start=' + str ( js
. get ( 'start' ))
71 'format_id' : purl
. group ( 'dir' ),
72 'ext' : purl
. group ( 'ext' ),
75 self
._ sort
_ formats
( formats
)
78 'title' : js
. get ( 'title' ),
79 'description' : description
,
80 'uploader' : 'Democracy Now' ,
81 'subtitles' : subtitles
,