--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import BrightcoveNewIE
+from ..utils import extract_attributes
+
+
+class BandaiChannelIE(BrightcoveNewIE):
+ IE_NAME = 'bandaichannel'
+ _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
+ _TESTS = [{
+ 'url': 'https://www.b-ch.com/titles/514/001',
+ 'md5': 'a0f2d787baa5729bed71108257f613a4',
+ 'info_dict': {
+ 'id': '6128044564001',
+ 'ext': 'mp4',
+ 'title': 'メタルファイターMIKU 第1話',
+ 'timestamp': 1580354056,
+ 'uploader_id': '5797077852001',
+ 'upload_date': '20200130',
+ 'duration': 1387.733,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ attrs = extract_attributes(self._search_regex(
+ r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
+ bc = self._download_json(
+ 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
+ video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
+ return self._parse_brightcove_metadata(bc, bc['id'])
from __future__ import unicode_literals
-import re
+from .zdf import ZDFIE
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_strdate,
- xpath_text,
- determine_ext,
- float_or_none,
- ExtractorError,
-)
-
-class DreiSatIE(InfoExtractor):
+class DreiSatIE(ZDFIE):
IE_NAME = '3sat'
- _GEO_COUNTRIES = ['DE']
- _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
- _TESTS = [
- {
- 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
- 'md5': 'be37228896d30a88f315b638900a026e',
- 'info_dict': {
- 'id': '45918',
- 'ext': 'mp4',
- 'title': 'Waidmannsheil',
- 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
- 'uploader': 'SCHWEIZWEIT',
- 'uploader_id': '100000210',
- 'upload_date': '20140913'
- },
- 'params': {
- 'skip_download': True, # m3u8 downloads
- }
+ _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
+ _TESTS = [{
+ # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
+ 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
+ 'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
+ 'info_dict': {
+ 'id': '141007_ab18_10wochensommer_film',
+ 'ext': 'mp4',
+ 'title': 'Ab 18! - 10 Wochen Sommer',
+ 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
+ 'duration': 2660,
+ 'timestamp': 1608604200,
+ 'upload_date': '20201222',
},
- {
- 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
- 'only_matching': True,
+ }, {
+ 'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
+ 'info_dict': {
+ 'id': '140913_sendung_schweizweit',
+ 'ext': 'mp4',
+ 'title': 'Waidmannsheil',
+ 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
+ 'timestamp': 1410623100,
+ 'upload_date': '20140913'
},
- ]
-
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
- param_groups = {}
- for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
- group_id = param_group.get(self._xpath_ns(
- 'id', 'http://www.w3.org/XML/1998/namespace'))
- params = {}
- for param in param_group:
- params[param.get('name')] = param.get('value')
- param_groups[group_id] = params
-
- formats = []
- for video in smil.findall(self._xpath_ns('.//video', namespace)):
- src = video.get('src')
- if not src:
- continue
- bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
- group_id = video.get('paramGroup')
- param_group = param_groups[group_id]
- for proto in param_group['protocols'].split(','):
- formats.append({
- 'url': '%s://%s' % (proto, param_group['host']),
- 'app': param_group['app'],
- 'play_path': src,
- 'ext': 'flv',
- 'format_id': '%s-%d' % (proto, bitrate),
- 'tbr': bitrate,
- })
- self._sort_formats(formats)
- return formats
-
- def extract_from_xml_url(self, video_id, xml_url):
- doc = self._download_xml(
- xml_url, video_id,
- note='Downloading video info',
- errnote='Failed to download video info')
-
- status_code = xpath_text(doc, './status/statuscode')
- if status_code and status_code != 'ok':
- if status_code == 'notVisibleAnymore':
- message = 'Video %s is not available' % video_id
- else:
- message = '%s returned error: %s' % (self.IE_NAME, status_code)
- raise ExtractorError(message, expected=True)
-
- title = xpath_text(doc, './/information/title', 'title', True)
-
- urls = []
- formats = []
- for fnode in doc.findall('.//formitaeten/formitaet'):
- video_url = xpath_text(fnode, 'url')
- if not video_url or video_url in urls:
- continue
- urls.append(video_url)
-
- is_available = 'http://www.metafilegenerator' not in video_url
- geoloced = 'static_geoloced_online' in video_url
- if not is_available or geoloced:
- continue
-
- format_id = fnode.attrib['basetype']
- format_m = re.match(r'''(?x)
- (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
- (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
- ''', format_id)
-
- ext = determine_ext(video_url, None) or format_m.group('container')
-
- if ext == 'meta':
- continue
- elif ext == 'smil':
- formats.extend(self._extract_smil_formats(
- video_url, video_id, fatal=False))
- elif ext == 'm3u8':
- # the certificates are misconfigured (see
- # https://github.com/ytdl-org/youtube-dl/issues/8665)
- if video_url.startswith('https://'):
- continue
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id=format_id, fatal=False))
- else:
- quality = xpath_text(fnode, './quality')
- if quality:
- format_id += '-' + quality
-
- abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
- vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
-
- tbr = int_or_none(self._search_regex(
- r'_(\d+)k', video_url, 'bitrate', None))
- if tbr and vbr and not abr:
- abr = tbr - vbr
-
- formats.append({
- 'format_id': format_id,
- 'url': video_url,
- 'ext': ext,
- 'acodec': format_m.group('acodec'),
- 'vcodec': format_m.group('vcodec'),
- 'abr': abr,
- 'vbr': vbr,
- 'tbr': tbr,
- 'width': int_or_none(xpath_text(fnode, './width')),
- 'height': int_or_none(xpath_text(fnode, './height')),
- 'filesize': int_or_none(xpath_text(fnode, './filesize')),
- 'protocol': format_m.group('proto').lower(),
- })
-
- geolocation = xpath_text(doc, './/details/geolocation')
- if not formats and geolocation and geolocation != 'none':
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
-
- self._sort_formats(formats)
-
- thumbnails = []
- for node in doc.findall('.//teaserimages/teaserimage'):
- thumbnail_url = node.text
- if not thumbnail_url:
- continue
- thumbnail = {
- 'url': thumbnail_url,
- }
- thumbnail_key = node.get('key')
- if thumbnail_key:
- m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
- if m:
- thumbnail['width'] = int(m.group(1))
- thumbnail['height'] = int(m.group(2))
- thumbnails.append(thumbnail)
-
- upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': xpath_text(doc, './/information/detail'),
- 'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
- 'thumbnails': thumbnails,
- 'uploader': xpath_text(doc, './/details/originChannelTitle'),
- 'uploader_id': xpath_text(doc, './/details/originChannelId'),
- 'upload_date': upload_date,
- 'formats': formats,
+ 'params': {
+ 'skip_download': True,
}
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
- return self.extract_from_xml_url(video_id, details_url)
+ }, {
+ # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
+ 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
+ 'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
+ 'only_matching': True,
+ }]
)
from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
+from .bandaichannel import BandaiChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bbc import (
BBCCoUkIE,
+# coding: utf-8
from __future__ import unicode_literals
-from .common import InfoExtractor
-from ..utils import ExtractorError
+import re
+from .youtube import YoutubeIE
+from .zdf import ZDFBaseIE
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ merge_dicts,
+ unified_timestamp,
+ xpath_text,
+)
-class PhoenixIE(InfoExtractor):
+
+class PhoenixIE(ZDFBaseIE):
IE_NAME = 'phoenix.de'
- _VALID_URL = r'''https?://(?:www\.)?phoenix.de/\D+(?P<id>\d+)\.html'''
- _TESTS = [
- {
- 'url': 'https://www.phoenix.de/sendungen/dokumentationen/unsere-welt-in-zukunft---stadt-a-1283620.html',
- 'md5': '5e765e838aa3531c745a4f5b249ee3e3',
- 'info_dict': {
- 'id': '0OB4HFc43Ns',
- 'ext': 'mp4',
- 'title': 'Unsere Welt in Zukunft - Stadt',
- 'description': 'md5:9bfb6fd498814538f953b2dcad7ce044',
- 'upload_date': '20190912',
- 'uploader': 'phoenix',
- 'uploader_id': 'phoenix',
- }
+ _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
+ _TESTS = [{
+ # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
+ 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
+ 'md5': '34ec321e7eb34231fd88616c65c92db0',
+ 'info_dict': {
+ 'id': '210222_phx_nachgehakt_corona_protest',
+ 'ext': 'mp4',
+ 'title': 'Wohin führt der Protest in der Pandemie?',
+ 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
+ 'duration': 1691,
+ 'timestamp': 1613906100,
+ 'upload_date': '20210221',
+ 'uploader': 'Phoenix',
+ 'channel': 'corona nachgehakt',
},
- {
- 'url': 'https://www.phoenix.de/drohnenangriffe-in-saudi-arabien-a-1286995.html?ref=aktuelles',
- 'only_matching': True,
+ }, {
+ # Youtube embed
+ 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
+ 'info_dict': {
+ 'id': 'hMQtqFYjomk',
+ 'ext': 'mp4',
+ 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
+ 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
+ 'duration': 3509,
+ 'upload_date': '20201219',
+ 'uploader': 'phoenix',
+ 'uploader_id': 'phoenix',
},
- # an older page: https://www.phoenix.de/sendungen/gespraeche/phoenix-persoenlich/im-dialog-a-177727.html
- # seems to not have an embedded video, even though it's uploaded on youtube: https://www.youtube.com/watch?v=4GxnoUHvOkM
- ]
-
- def extract_from_json_api(self, video_id, api_url):
- doc = self._download_json(
- api_url, video_id,
- note="Downloading webpage metadata",
- errnote="Failed to load webpage metadata")
-
- for a in doc["absaetze"]:
- if a["typ"] == "video-youtube":
- return {
- '_type': 'url_transparent',
- 'id': a["id"],
- 'title': doc["titel"],
- 'url': "https://www.youtube.com/watch?v=%s" % a["id"],
- 'ie_key': 'Youtube',
- }
- raise ExtractorError("No downloadable video found", expected=True)
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
+ 'only_matching': True,
+ }, {
+ # no media
+ 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
+ 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- page_id = self._match_id(url)
- api_url = 'https://www.phoenix.de/response/id/%s' % page_id
- return self.extract_from_json_api(page_id, api_url)
+ article_id = self._match_id(url)
+
+ article = self._download_json(
+ 'https://www.phoenix.de/response/id/%s' % article_id, article_id,
+ 'Downloading article JSON')
+
+ video = article['absaetze'][0]
+ title = video.get('titel') or article.get('subtitel')
+
+ if video.get('typ') == 'video-youtube':
+ video_id = video['id']
+ return self.url_result(
+ video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
+ video_title=title)
+
+ video_id = compat_str(video.get('basename') or video.get('content'))
+
+ details = self._download_xml(
+ 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
+ video_id, 'Downloading details XML', query={
+ 'ak': 'web',
+ 'ptmd': 'true',
+ 'id': video_id,
+ 'profile': 'player2',
+ })
+
+ title = title or xpath_text(
+ details, './/information/title', 'title', fatal=True)
+ content_id = xpath_text(
+ details, './/video/details/basename', 'content id', fatal=True)
+
+ info = self._extract_ptmd(
+ 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
+ content_id, None, url)
+
+ timestamp = unified_timestamp(xpath_text(details, './/details/airtime'))
+
+ thumbnails = []
+ for node in details.findall('.//teaserimages/teaserimage'):
+ thumbnail_url = node.text
+ if not thumbnail_url:
+ continue
+ thumbnail = {
+ 'url': thumbnail_url,
+ }
+ thumbnail_key = node.get('key')
+ if thumbnail_key:
+ m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
+ if m:
+ thumbnail['width'] = int(m.group(1))
+ thumbnail['height'] = int(m.group(2))
+ thumbnails.append(thumbnail)
+
+ return merge_dicts(info, {
+ 'id': content_id,
+ 'title': title,
+ 'description': xpath_text(details, './/information/detail'),
+ 'duration': int_or_none(xpath_text(details, './/details/lengthSec')),
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'uploader': xpath_text(details, './/details/channel'),
+ 'uploader_id': xpath_text(details, './/details/originChannelId'),
+ 'channel': xpath_text(details, './/details/originChannelTitle'),
+ })
from .srgssr import SRGSSRIE
from ..compat import compat_str
from ..utils import (
+ determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
unescapeHTML,
- determine_ext,
+ urljoin,
)
_TESTS = [
{
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
- 'md5': 'ff7f8450a90cf58dacb64e29707b4a8e',
+ 'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': {
'id': '3449373',
'display_id': 'les-enfants-terribles',
'thumbnail': r're:^https?://.*\.image',
'view_count': int,
},
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
},
{
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
# m3u8 download
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
'skip': 'Blocked outside Switzerland',
},
{
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
- 'md5': '1bae984fe7b1f78e94abc74e802ed99f',
+ 'md5': '9bb06503773c07ce83d3cbd793cebb91',
'info_dict': {
'id': '5745356',
'display_id': 'londres-cachee-par-un-epais-smog',
'thumbnail': r're:^https?://.*\.image',
'view_count': int,
},
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
},
{
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
media_type = 'video' if 'video' in all_info else 'audio'
# check for errors
- self.get_media_data('rts', media_type, media_id)
+ self._get_media_data('rts', media_type, media_id)
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
'tbr': extract_bitrate(format_url),
})
+ download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '')
for media in info.get('media', []):
media_url = media.get('url')
if not media_url or re.match(r'https?://', media_url):
format_id += '-%dk' % rate
formats.append({
'format_id': format_id,
- 'url': 'http://download-video.rts.ch/' + media_url,
+ 'url': urljoin(download_base, media_url),
'tbr': rate or extract_bitrate(media_url),
})
import re
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
+ float_or_none,
+ int_or_none,
parse_iso8601,
qualities,
+ try_get,
)
class SRGSSRIE(InfoExtractor):
- _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|
+ srgssr
+ ):
+ (?P<bu>
+ srf|rts|rsi|rtr|swi
+ ):(?:[^:]+:)?
+ (?P<type>
+ video|audio
+ ):
+ (?P<id>
+ [0-9a-f\-]{36}|\d+
+ )
+ '''
_GEO_BYPASS = False
_GEO_COUNTRIES = ['CH']
'LEGAL': 'The video cannot be transmitted for legal reasons.',
'STARTDATE': 'This video is not yet available. Please try again later.',
}
+ _DEFAULT_LANGUAGE_CODES = {
+ 'srf': 'de',
+ 'rts': 'fr',
+ 'rsi': 'it',
+ 'rtr': 'rm',
+ 'swi': 'en',
+ }
def _get_tokenized_src(self, url, video_id, format_id):
- sp = compat_urllib_parse_urlparse(url).path.split('/')
token = self._download_json(
- 'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % (sp[1], sp[2]),
+ 'http://tp.srgssr.ch/akahd/token?acl=*',
video_id, 'Downloading %s token' % format_id, fatal=False) or {}
- auth_params = token.get('token', {}).get('authparams')
+ auth_params = try_get(token, lambda x: x['token']['authparams'])
if auth_params:
- url += '?' + auth_params
+ url += ('?' if '?' not in url else '&') + auth_params
return url
- def get_media_data(self, bu, media_type, media_id):
- media_data = self._download_json(
- 'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id),
- media_id)[media_type.capitalize()]
-
- if media_data.get('block') and media_data['block'] in self._ERRORS:
- message = self._ERRORS[media_data['block']]
- if media_data['block'] == 'GEOBLOCK':
+ def _get_media_data(self, bu, media_type, media_id):
+ query = {'onlyChapters': True} if media_type == 'video' else {}
+ full_media_data = self._download_json(
+ 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json'
+ % (bu, media_type, media_id),
+ media_id, query=query)['chapterList']
+ try:
+ media_data = next(
+ x for x in full_media_data if x.get('id') == media_id)
+ except StopIteration:
+ raise ExtractorError('No media information found')
+
+ block_reason = media_data.get('blockReason')
+ if block_reason and block_reason in self._ERRORS:
+ message = self._ERRORS[block_reason]
+ if block_reason == 'GEOBLOCK':
self.raise_geo_restricted(
msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError(
def _real_extract(self, url):
bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
+ media_data = self._get_media_data(bu, media_type, media_id)
+ title = media_data['title']
- media_data = self.get_media_data(bu, media_type, media_id)
-
- metadata = media_data['AssetMetadatas']['AssetMetadata'][0]
- title = metadata['title']
- description = metadata.get('description')
- created_date = media_data.get('createdDate') or metadata.get('createdDate')
- timestamp = parse_iso8601(created_date)
-
- thumbnails = [{
- 'id': image.get('id'),
- 'url': image['url'],
- } for image in media_data.get('Image', {}).get('ImageRepresentations', {}).get('ImageRepresentation', [])]
-
- preference = qualities(['LQ', 'MQ', 'SD', 'HQ', 'HD'])
formats = []
- for source in media_data.get('Playlists', {}).get('Playlist', []) + media_data.get('Downloads', {}).get('Download', []):
- protocol = source.get('@protocol')
- for asset in source['url']:
- asset_url = asset['text']
- quality = asset['@quality']
- format_id = '%s-%s' % (protocol, quality)
- if protocol.startswith('HTTP-HDS') or protocol.startswith('HTTP-HLS'):
- asset_url = self._get_tokenized_src(asset_url, media_id, format_id)
- if protocol.startswith('HTTP-HDS'):
- formats.extend(self._extract_f4m_formats(
- asset_url + ('?' if '?' not in asset_url else '&') + 'hdcore=3.4.0',
- media_id, f4m_id=format_id, fatal=False))
- elif protocol.startswith('HTTP-HLS'):
- formats.extend(self._extract_m3u8_formats(
- asset_url, media_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False))
- else:
- formats.append({
- 'format_id': format_id,
- 'url': asset_url,
- 'quality': preference(quality),
- 'ext': 'flv' if protocol == 'RTMP' else None,
- })
+ q = qualities(['SD', 'HD'])
+ for source in (media_data.get('resourceList') or []):
+ format_url = source.get('url')
+ if not format_url:
+ continue
+ protocol = source.get('protocol')
+ quality = source.get('quality')
+ format_id = []
+ for e in (protocol, source.get('encoding'), quality):
+ if e:
+ format_id.append(e)
+ format_id = '-'.join(format_id)
+
+ if protocol in ('HDS', 'HLS'):
+ if source.get('tokenType') == 'AKAMAI':
+ format_url = self._get_tokenized_src(
+ format_url, media_id, format_id)
+ formats.extend(self._extract_akamai_formats(
+ format_url, media_id))
+ elif protocol == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif protocol in ('HTTP', 'HTTPS'):
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'quality': q(quality),
+ })
+
+ # This is needed because for audio medias the podcast url is usually
+ # always included, even if is only an audio segment and not the
+ # whole episode.
+ if int_or_none(media_data.get('position')) == 0:
+ for p in ('S', 'H'):
+ podcast_url = media_data.get('podcast%sdUrl' % p)
+ if not podcast_url:
+ continue
+ quality = p + 'D'
+ formats.append({
+ 'format_id': 'PODCAST-' + quality,
+ 'url': podcast_url,
+ 'quality': q(quality),
+ })
self._sort_formats(formats)
+ subtitles = {}
+ if media_type == 'video':
+ for sub in (media_data.get('subtitleList') or []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ lang = sub.get('locale') or self._DEFAULT_LANGUAGE_CODES[bu]
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
return {
'id': media_id,
'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'thumbnails': thumbnails,
+ 'description': media_data.get('description'),
+ 'timestamp': parse_iso8601(media_data.get('date')),
+ 'thumbnail': media_data.get('imageUrl'),
+ 'duration': float_or_none(media_data.get('duration'), 1000),
+ 'subtitles': subtitles,
'formats': formats,
}
_TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
- 'md5': 'da6b5b3ac9fa4761a942331cef20fcb3',
+ 'md5': '6db2226ba97f62ad42ce09783680046c',
'info_dict': {
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'ext': 'mp4',
'upload_date': '20130701',
'title': 'Snowden beantragt Asyl in Russland',
- 'timestamp': 1372713995,
- }
- }, {
- # No Speichern (Save) button
- 'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa',
- 'md5': '0a274ce38fda48c53c01890651985bc6',
- 'info_dict': {
- 'id': '677f5829-e473-4823-ac83-a1087fe97faa',
- 'ext': 'flv',
- 'upload_date': '20130710',
- 'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
- 'description': 'md5:88604432b60d5a38787f152dec89cd56',
- 'timestamp': 1373493600,
+ 'timestamp': 1372708215,
+ 'duration': 113.827,
+ 'thumbnail': r're:^https?://.*1383719781\.png$',
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc',
'info_dict': {
'ext': 'mp3',
'upload_date': '20151013',
'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem',
- 'timestamp': 1444750398,
+ 'timestamp': 1444709160,
+ 'duration': 336.816,
},
'params': {
# rtmp download
'id': '6348260',
'display_id': '6348260',
'ext': 'mp4',
- 'duration': 1796,
+ 'duration': 1796.76,
'title': 'Le 19h30',
- 'description': '',
- 'uploader': '19h30',
'upload_date': '20141201',
'timestamp': 1417458600,
'thumbnail': r're:^https?://.*\.image',
- 'view_count': int,
},
'params': {
# m3u8 download
'skip_download': True,
}
+ }, {
+ 'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270',
+ 'info_dict': {
+ 'id': '42960270',
+ 'ext': 'mp4',
+ 'title': 'Why people were against tax reforms',
+ 'description': 'md5:7ac442c558e9630e947427469c4b824d',
+ 'duration': 94.0,
+ 'upload_date': '20170215',
+ 'timestamp': 1487173560,
+ 'thumbnail': r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964',
+ 'subtitles': 'count:9',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
}, {
'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
'only_matching': True,
}, {
'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260',
'only_matching': True,
+ }, {
+ # audio segment, has podcastSdUrl of the full episode
+ 'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb',
+ 'only_matching': True,
}]
def _real_extract(self, url):
bu = mobj.group('bu')
media_type = mobj.group('type') or mobj.group('type_2')
media_id = mobj.group('id')
- # other info can be extracted from url + '&layout=json'
return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import int_or_none
class StretchInternetIE(InfoExtractor):
'info_dict': {
'id': '573272',
'ext': 'mp4',
- 'title': 'University of Mary Wrestling vs. Upper Iowa',
- 'timestamp': 1575668361,
- 'upload_date': '20191206',
+ 'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA',
+ # 'timestamp': 1575668361,
+ # 'upload_date': '20191206',
+ 'uploader_id': '99997',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
+ media_url = self._download_json(
+ 'https://core.stretchlive.com/trinity/event/tcg/' + video_id,
+ video_id)[0]['media'][0]['url']
event = self._download_json(
- 'https://api.stretchinternet.com/trinity/event/tcg/' + video_id,
- video_id)[0]
+ 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
+ video_id, query={'eventID': video_id, 'token': 'asdf'})['event']
return {
'id': video_id,
'title': event['title'],
- 'timestamp': int_or_none(event.get('dateCreated'), 1000),
- 'url': 'https://' + event['media'][0]['url'],
+ # TODO: parse US timezone abbreviations
+ # 'timestamp': event.get('dateTimeString'),
+ 'url': 'https://' + media_url,
+ 'uploader_id': event.get('ownerID'),
}
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
'timestamp': 1513292400,
'upload_date': '20171214',
+ 'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
+ 'duration': 2269,
+ 'categories': ['Kultur & historia'],
+ 'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
+ 'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
},
}, {
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
'timestamp': 1440086400,
'upload_date': '20150820',
+ 'series': 'Tripp, Trapp, Träd',
+ 'duration': 865,
+ 'tags': ['Sova'],
+ 'episode': 'Sovkudde',
},
}, {
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
video_id = self._match_id(url)
url = url.replace('skola.se/Produkter', 'play.se/program')
webpage = self._download_webpage(url, video_id)
- urplayer_data = self._parse_json(self._html_search_regex(
+ vid = int(video_id)
+ accessible_episodes = self._parse_json(self._html_search_regex(
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
- webpage, 'urplayer data'), video_id)['accessibleEpisodes'][0]
+ webpage, 'urplayer data'), video_id)['accessibleEpisodes']
+ urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
episode = urplayer_data['title']
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
'https://www.vvvvid.it/user/login',
None, headers=self.geo_verification_headers())['data']['conn_id']
- def _download_info(self, show_id, path, video_id, fatal=True):
+ def _download_info(self, show_id, path, video_id, fatal=True, query=None):
+ q = {
+ 'conn_id': self._conn_id,
+ }
+ if query:
+ q.update(query)
response = self._download_json(
'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
- video_id, headers=self.geo_verification_headers(), query={
- 'conn_id': self._conn_id,
- }, fatal=fatal)
+ video_id, headers=self.geo_verification_headers(), query=q, fatal=fatal)
if not (response or fatal):
return
if response.get('result') == 'error':
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
response = self._download_info(
- show_id, 'season/%s' % season_id, video_id)
+ show_id, 'season/%s' % season_id,
+ video_id, query={'video_id': video_id})
vid = int(video_id)
video_data = list(filter(
show_info = self._download_info(
show_id, 'info/', show_title, fatal=False)
+ if not show_title:
+ base_url += "/title"
+
entries = []
for season in (seasons or []):
episodes = season.get('episodes') or []
+ playlist_title = season.get('name') or show_info.get('title')
for episode in episodes:
if episode.get('playable') is False:
continue
continue
info = self._extract_common_video_info(episode)
info.update({
- '_type': 'url',
+ '_type': 'url_transparent',
'ie_key': VVVVIDIE.ie_key(),
'url': '/'.join([base_url, season_id, video_id]),
'title': episode.get('title'),
'description': episode.get('description'),
'season_id': season_id,
+ 'playlist_title': playlist_title,
})
entries.append(info)
from ..compat import compat_str
from ..utils import (
determine_ext,
+ float_or_none,
int_or_none,
+ merge_dicts,
NO_DEFAULT,
orderedSet,
parse_codecs,
class ZDFBaseIE(InfoExtractor):
- def _call_api(self, url, player, referrer, video_id, item):
- return self._download_json(
- url, video_id, 'Downloading JSON %s' % item,
- headers={
- 'Referer': referrer,
- 'Api-Auth': 'Bearer %s' % player['apiToken'],
- })
-
- def _extract_player(self, webpage, video_id, fatal=True):
- return self._parse_json(
- self._search_regex(
- r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
- 'player JSON', default='{}' if not fatal else NO_DEFAULT,
- group='json'),
- video_id)
-
-
-class ZDFIE(ZDFBaseIE):
- IE_NAME = "ZDF-3sat"
- _VALID_URL = r'https?://www\.(zdf|3sat)\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
- _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
_GEO_COUNTRIES = ['DE']
+ _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
- _TESTS = [{
- 'url': 'https://www.3sat.de/wissen/wissenschaftsdoku/luxusgut-lebensraum-100.html',
- 'info_dict': {
- 'id': 'luxusgut-lebensraum-100',
- 'ext': 'mp4',
- 'title': 'Luxusgut Lebensraum',
- 'description': 'md5:5c09b2f45ac3bc5233d1b50fc543d061',
- 'duration': 2601,
- 'timestamp': 1566497700,
- 'upload_date': '20190822',
- }
- }, {
- 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
- 'info_dict': {
- 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
- 'ext': 'mp4',
- 'title': 'Die Magie der Farben (2/2)',
- 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
- 'duration': 2615,
- 'timestamp': 1465021200,
- 'upload_date': '20160604',
- },
- }, {
- 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
- 'only_matching': True,
- }]
+ def _call_api(self, url, video_id, item, api_token=None, referrer=None):
+ headers = {}
+ if api_token:
+ headers['Api-Auth'] = 'Bearer %s' % api_token
+ if referrer:
+ headers['Referer'] = referrer
+ return self._download_json(
+ url, video_id, 'Downloading JSON %s' % item, headers=headers)
@staticmethod
def _extract_subtitles(src):
})
formats.append(f)
- def _extract_entry(self, url, player, content, video_id):
- title = content.get('title') or content['teaserHeadline']
-
- t = content['mainVideoContent']['http://zdf.de/rels/target']
-
- ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
-
- if not ptmd_path:
- ptmd_path = t[
- 'http://zdf.de/rels/streams/ptmd-template'].replace(
- '{playerId}', 'ngplayer_2_4')
-
+ def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
ptmd = self._call_api(
- urljoin(url, ptmd_path), player, url, video_id, 'metadata')
+ ptmd_url, video_id, 'metadata', api_token, referrer)
+
+ content_id = ptmd.get('basename') or ptmd_url.split('/')[-1]
formats = []
track_uris = set()
continue
for track in tracks:
self._extract_format(
- video_id, formats, track_uris, {
+ content_id, formats, track_uris, {
'url': track.get('uri'),
'type': f.get('type'),
'mimeType': f.get('mimeType'),
})
self._sort_formats(formats)
+ duration = float_or_none(try_get(
+ ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
+
+ return {
+ 'extractor_key': ZDFIE.ie_key(),
+ 'id': content_id,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': self._extract_subtitles(ptmd),
+ }
+
+ def _extract_player(self, webpage, video_id, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
+ 'player JSON', default='{}' if not fatal else NO_DEFAULT,
+ group='json'),
+ video_id)
+
+
+class ZDFIE(ZDFBaseIE):
+ _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
+ _TESTS = [{
+ # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
+ 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
+ 'md5': '34ec321e7eb34231fd88616c65c92db0',
+ 'info_dict': {
+ 'id': '210222_phx_nachgehakt_corona_protest',
+ 'ext': 'mp4',
+ 'title': 'Wohin führt der Protest in der Pandemie?',
+ 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
+ 'duration': 1691,
+ 'timestamp': 1613948400,
+ 'upload_date': '20210221',
+ },
+ }, {
+ # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
+ 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
+ 'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
+ 'info_dict': {
+ 'id': '141007_ab18_10wochensommer_film',
+ 'ext': 'mp4',
+ 'title': 'Ab 18! - 10 Wochen Sommer',
+ 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
+ 'duration': 2660,
+ 'timestamp': 1608604200,
+ 'upload_date': '20201222',
+ },
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
+ 'info_dict': {
+ 'id': '151025_magie_farben2_tex',
+ 'ext': 'mp4',
+ 'title': 'Die Magie der Farben (2/2)',
+ 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
+ 'duration': 2615,
+ 'timestamp': 1465021200,
+ 'upload_date': '20160604',
+ },
+ }, {
+ # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
+ 'url': 'https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.3sat.de/film/spielfilm/der-hauptmann-100.html
+ 'url': 'https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
+ 'url': 'https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_entry(self, url, player, content, video_id):
+ title = content.get('title') or content['teaserHeadline']
+
+ t = content['mainVideoContent']['http://zdf.de/rels/target']
+
+ ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
+
+ if not ptmd_path:
+ ptmd_path = t[
+ 'http://zdf.de/rels/streams/ptmd-template'].replace(
+ '{playerId}', 'ngplayer_2_4')
+
+ info = self._extract_ptmd(
+ urljoin(url, ptmd_path), video_id, player['apiToken'], url)
+
thumbnails = []
layouts = try_get(
content, lambda x: x['teaserImageRef']['layouts'], dict)
})
thumbnails.append(thumbnail)
- return {
- 'id': video_id,
+ return merge_dicts(info, {
'title': title,
'description': content.get('leadParagraph') or content.get('teasertext'),
'duration': int_or_none(t.get('duration')),
'timestamp': unified_timestamp(content.get('editorialDate')),
'thumbnails': thumbnails,
- 'subtitles': self._extract_subtitles(ptmd),
- 'formats': formats,
- }
+ })
def _extract_regular(self, url, player, video_id):
content = self._call_api(
- player['content'], player, url, video_id, 'content')
+ player['content'], video_id, 'content', player['apiToken'], url)
return self._extract_entry(player['content'], player, content, video_id)
def _extract_mobile(self, video_id):
- document = self._download_json(
+ video = self._download_json(
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
- video_id)['document']
+ video_id)
+
+ document = video['document']
title = document['titel']
+ content_id = document['basename']
formats = []
format_urls = set()
for f in document['formitaeten']:
- self._extract_format(video_id, formats, format_urls, f)
+ self._extract_format(content_id, formats, format_urls, f)
self._sort_formats(formats)
thumbnails = []
})
return {
- 'id': video_id,
+ 'id': content_id,
'title': title,
'description': document.get('beschreibung'),
'duration': int_or_none(document.get('length')),
- 'timestamp': unified_timestamp(try_get(
- document, lambda x: x['meta']['editorialDate'], compat_str)),
+ 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
+ try_get(video, lambda x: x['meta']['editorialDate'], compat_str)),
'thumbnails': thumbnails,
'subtitles': self._extract_subtitles(document),
'formats': formats,