]>
jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/brightcove.py
3 import xml
.etree
.ElementTree
5 from .common
import InfoExtractor
10 class BrightcoveIE(InfoExtractor
):
11 _VALID_URL
= r
'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
12 _FEDERATED_URL_TEMPLATE
= 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
13 _PLAYLIST_URL_TEMPLATE
= 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
15 # There is a test for Brigtcove in GenericIE, that way we test both the download
16 # and the detection of videos, and we don't have to find an URL that is always valid
19 def _build_brighcove_url(cls
, object_str
):
21 Build a Brightcove url from a xml string containing
22 <object class="BrightcoveExperience">{params}</object>
24 object_doc
= xml
.etree
.ElementTree
.fromstring(object_str
)
25 assert object_doc
.attrib
['class'] == u
'BrightcoveExperience'
26 params
= {'flashID': object_doc
.attrib
['id'],
27 'playerID': object_doc
.find('./param[@name="playerID"]').attrib
['value'],
29 playerKey
= object_doc
.find('./param[@name="playerKey"]')
30 # Not all pages define this value
31 if playerKey
is not None:
32 params
['playerKey'] = playerKey
.attrib
['value']
33 videoPlayer
= object_doc
.find('./param[@name="@videoPlayer"]')
34 if videoPlayer
is not None:
35 params
['@videoPlayer'] = videoPlayer
.attrib
['value']
36 data
= compat_urllib_parse
.urlencode(params
)
37 return cls
._FEDERATED
_URL
_TEMPLATE
% data
39 def _real_extract(self
, url
):
40 mobj
= re
.match(self
._VALID
_URL
, url
)
41 query
= mobj
.group('query')
43 m_video_id
= re
.search(r
'videoPlayer=(\d+)', query
)
44 if m_video_id
is not None:
45 video_id
= m_video_id
.group(1)
46 return self
._get
_video
_info
(video_id
, query
)
48 player_key
= self
._search
_regex
(r
'playerKey=(.+?)(&|$)', query
, 'playlist_id')
49 return self
._get
_playlist
_info
(player_key
)
51 def _get_video_info(self
, video_id
, query
):
52 request_url
= self
._FEDERATED
_URL
_TEMPLATE
% query
53 webpage
= self
._download
_webpage
(request_url
, video_id
)
55 self
.report_extraction(video_id
)
56 info
= self
._search
_regex
(r
'var experienceJSON = ({.*?});', webpage
, 'json')
57 info
= json
.loads(info
)['data']
58 video_info
= info
['programmedContent']['videoPlayer']['mediaDTO']
60 return self
._extract
_video
_info
(video_info
)
62 def _get_playlist_info(self
, player_key
):
63 playlist_info
= self
._download
_webpage
(self
._PLAYLIST
_URL
_TEMPLATE
% player_key
,
64 player_key
, u
'Downloading playlist information')
66 playlist_info
= json
.loads(playlist_info
)['videoList']
67 videos
= [self
._extract
_video
_info
(video_info
) for video_info
in playlist_info
['mediaCollectionDTO']['videoDTOs']]
69 return self
.playlist_result(videos
, playlist_id
=playlist_info
['id'],
70 playlist_title
=playlist_info
['mediaCollectionDTO']['displayName'])
72 def _extract_video_info(self
, video_info
):
73 renditions
= video_info
['renditions']
74 renditions
= sorted(renditions
, key
=lambda r
: r
['size'])
75 best_format
= renditions
[-1]
77 return {'id': video_info
['id'],
78 'title': video_info
['displayName'],
79 'url': best_format
['defaultURL'],
81 'description': video_info
.get('shortDescription'),
82 'thumbnail': video_info
.get('videoStillURL') or video_info
.get('thumbnailURL'),
83 'uploader': video_info
.get('publisherName'),