]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/bbccouk.py
[bbc.co.uk] Fix regex
[yt-dlp.git] / youtube_dl / extractor / bbccouk.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import ExtractorError
7
8
9 class BBCCoUkIE(InfoExtractor):
10 IE_NAME = 'bbc.co.uk'
11 IE_DESC = 'BBC - iPlayer Radio'
12 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
13
14 _TEST = {
15 'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
16 'info_dict': {
17 'id': 'p01q7wz4',
18 'ext': 'flv',
19 'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
20 'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
21 'duration': 1936,
22 },
23 'params': {
24 # rtmp download
25 'skip_download': True,
26 }
27 }
28
29 def _real_extract(self, url):
30 mobj = re.match(self._VALID_URL, url)
31 group_id = mobj.group('id')
32
33 playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
34 'Downloading playlist XML')
35
36 item = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}item')
37 if item is None:
38 no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
39 if no_items is not None:
40 reason = no_items.get('reason')
41 if reason == 'preAvailability':
42 msg = 'Episode %s is not yet available' % group_id
43 elif reason == 'postAvailability':
44 msg = 'Episode %s is no longer available' % group_id
45 else:
46 msg = 'Episode %s is not available: %s' % (group_id, reason)
47 raise ExtractorError(msg, expected=True)
48 raise ExtractorError('Failed to extract media for episode %s' % group_id, expected=True)
49
50 title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
51 description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
52
53 radio_programme_id = item.get('identifier')
54 duration = int(item.get('duration'))
55
56 media_selection = self._download_xml(
57 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % radio_programme_id,
58 radio_programme_id, 'Downloading media selection XML')
59
60 formats = []
61 for media in media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media'):
62 bitrate = int(media.get('bitrate'))
63 encoding = media.get('encoding')
64 service = media.get('service')
65 connection = media.find('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
66 protocol = connection.get('protocol')
67 priority = connection.get('priority')
68 supplier = connection.get('supplier')
69 if protocol == 'http':
70 href = connection.get('href')
71 # ASX playlist
72 if supplier == 'asx':
73 asx = self._download_xml(href, radio_programme_id, 'Downloading %s ASX playlist' % service)
74 for i, ref in enumerate(asx.findall('./Entry/ref')):
75 formats.append({
76 'url': ref.get('href'),
77 'format_id': '%s_ref%s' % (service, i),
78 'abr': bitrate,
79 'acodec': encoding,
80 'preference': priority,
81 })
82 continue
83 # Direct link
84 formats.append({
85 'url': href,
86 'format_id': service,
87 'abr': bitrate,
88 'acodec': encoding,
89 'preference': priority,
90 })
91 elif protocol == 'rtmp':
92 application = connection.get('application', 'ondemand')
93 auth_string = connection.get('authString')
94 identifier = connection.get('identifier')
95 server = connection.get('server')
96 formats.append({
97 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
98 'play_path': identifier,
99 'app': '%s?%s' % (application, auth_string),
100 'rtmp_live': False,
101 'ext': 'flv',
102 'format_id': service,
103 'abr': bitrate,
104 'acodec': encoding,
105 'preference': priority,
106 })
107
108 self._sort_formats(formats)
109
110 return {
111 'id': radio_programme_id,
112 'title': title,
113 'description': description,
114 'duration': duration,
115 'formats': formats,
116 }