]>
jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/orf.py
2 import xml
. etree
. ElementTree
5 from . common
import InfoExtractor
12 class ORFIE ( InfoExtractor
):
13 _VALID_URL
= r
'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
16 u
'url' : u
'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter' ,
17 u
'file' : u
'6566957.flv' ,
20 u
'description' : u
'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich. \r \r Mehr Wetter unter wetter.ORF.at' ,
24 u
'skip_download' : True ,
28 def _real_extract ( self
, url
):
29 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
30 playlist_id
= mobj
. group ( 'id' )
31 webpage
= self
._ download
_ webpage
( url
, playlist_id
)
33 flash_xml
= self
._ search
_ regex
( 'ORF.flashXML = \' (.+?) \' ' , webpage
, u
'flash xml' )
34 flash_xml
= compat_urlparse
. parse_qs ( 'xml=' + flash_xml
)[ 'xml' ][ 0 ]
35 flash_config
= xml
. etree
. ElementTree
. fromstring ( flash_xml
. encode ( 'utf-8' ))
36 playlist_json
= self
._ search
_ regex
( r
'playlist\' : \' ( \
[.* ?\
]) \' ', webpage, u' playlist
').replace(r' \" ',' "')
37 playlist = json.loads(playlist_json)
40 ns = ' {http://tempuri.org/XMLSchema.xsd} '
41 xpath = ' %(ns)s Playlist/ %(ns)s Items/ %(ns)s Item' % {'ns': ns}
42 webpage_description = self._og_search_description(webpage)
43 for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
44 # Get best quality url
46 for q in ['Q6A', 'Q4A', 'Q1A']:
47 video_url = find_xpath_attr(item, ' %s VideoUrl' % ns, 'quality', q)
48 if video_url is not None:
49 rtmp_url = video_url.text
52 raise ExtractorError(u'Couldn \' t get video url: %s ' % info['id'])
53 description = self._html_search_regex(
54 r'id=" playlist_entry_
%s ".*?<p>(.*?)</p>' % i, webpage,
55 u'description', default=webpage_description, flags=re.DOTALL)
59 'title': info['title'],
62 'description': description,