import itertools
import json
import re
-import urllib.error
import xml.etree.ElementTree
from .common import InfoExtractor
-from ..compat import compat_HTTPError, compat_str, compat_urlparse
+from ..compat import compat_str, compat_urlparse
+from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
OnDemandPagedList,
float_or_none,
get_element_by_class,
int_or_none,
+ join_nonempty,
js_to_json,
parse_duration,
parse_iso8601,
parse_qs,
strip_or_none,
+ traverse_obj,
try_get,
unescapeHTML,
unified_timestamp,
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/(?:clips|audiovideo/popular)[/#]|
radio/player/|
- sounds/play/|
events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
# rtmp download
'skip_download': True,
},
- }, {
- 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
- 'note': 'Audio',
- 'info_dict': {
- 'id': 'm0007jz9',
- 'ext': 'mp4',
- 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
- 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
- 'duration': 9840,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
}, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
headers={'Referer': self._LOGIN_URL})
- if self._LOGIN_URL in urlh.geturl():
+ if self._LOGIN_URL in urlh.url:
error = clean_html(get_element_by_class('form-message', response))
if error:
raise ExtractorError(
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
except ExtractorError as e:
- if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
- and e.exc_info[1].code in (403, 404)):
+ if not (isinstance(e.exc_info[1], HTTPError)
+ and e.exc_info[1].status in (403, 404)):
raise
fmts = []
formats.extend(fmts)
return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee:
- if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
+ if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
raise
# fallback to legacy playlist
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
- self._sort_formats(formats)
-
return {
'id': programme_id,
'title': title,
'upload_date': '20190604',
'categories': ['Psychology'],
},
+ }, {
+ # BBC Sounds
+ 'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
+ 'info_dict': {
+ 'id': 'm001q789',
+ 'ext': 'mp4',
+ 'title': 'The Night Tracks Mix - Music for the darkling hour',
+ 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
+ 'chapters': 'count:8',
+ 'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
+ 'uploader': 'Radio 3',
+ 'duration': 1800,
+ 'uploader_id': 'bbc_radio_three',
+ },
}, { # onion routes
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
'only_matching': True,
def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
programme_id, title, description, duration, formats, subtitles = \
self._process_legacy_playlist_url(url, playlist_id)
- self._sort_formats(formats)
return {
'id': programme_id,
'title': title,
duration = int_or_none(items[0].get('duration'))
programme_id = items[0].get('vpid')
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
entries.append({
'id': programme_id,
'title': title,
# Some playlist URL may fail with 500, at the same time
# the other one may work fine (e.g.
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 500:
continue
raise
if entry:
- self._sort_formats(entry['formats'])
entries.append(entry)
if entries:
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
# digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
digital_data = self._parse_json(
self._search_regex(
if version_id:
title = smp_data['title']
formats, subtitles = self._download_media_selector(version_id)
- self._sort_formats(formats)
image_url = smp_data.get('holdingImageURL')
display_date = init_data.get('displayDate')
topic_title = init_data.get('topicTitle')
continue
title = lead_media.get('title') or self._og_search_title(webpage)
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
description = lead_media.get('summary')
uploader = lead_media.get('masterBrand')
uploader_id = lead_media.get('mid')
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
synopses = current_programme.get('synopses') or {}
network = current_programme.get('network') or {}
duration = int_or_none(
'uploader_id': network.get('id'),
'formats': formats,
'subtitles': subtitles,
+ 'chapters': traverse_obj(preload_state, (
+ 'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
+ 'title': ('titles', {lambda x: join_nonempty(
+ 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+ 'start_time': ('offset', 'start', {float_or_none}),
+ 'end_time': ('offset', 'end', {float_or_none}),
+ })) or None,
}
bbc3_config = self._parse_json(
clip_title = clip.get('title')
if clip_vpid and clip_title:
formats, subtitles = self._download_media_selector(clip_vpid)
- self._sort_formats(formats)
return {
'id': clip_vpid,
'title': clip_title,
if not programme_id:
continue
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
entries.append({
'id': programme_id,
'title': playlist_title,
if not (item_id and item_title):
continue
formats, subtitles = self._download_media_selector(item_id)
- self._sort_formats(formats)
item_desc = None
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
if blocks:
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
if not formats and not self.get_param('ignore_no_formats'):
continue
- self._sort_formats(formats)
video_id = media_meta.get('externalId')
if not video_id: