-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
int_or_none,
+ join_nonempty,
parse_iso8601,
+ traverse_obj,
try_get,
unescapeHTML,
+ urljoin,
)
)|
(?:api|player)\.multicastmedia|
(?:api-ovp|player)\.piksel
- )\.com|
+ )\.(?:com|tech)|
(?:
mz-edge\.stream\.co|
movie-s\.nhk\.or
)\.jp|
vidego\.baltimorecity\.gov
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.(?:com|tech)/v/[a-z0-9]+)']
_TESTS = [
{
- 'url': 'http://player.piksel.com/v/ums2867l',
+ 'url': 'http://player.piksel.tech/v/ums2867l',
'md5': '34e34c8d89dc2559976a6079db531e85',
'info_dict': {
'id': 'ums2867l',
'ext': 'mp4',
'title': 'GX-005 with Caption',
'timestamp': 1481335659,
- 'upload_date': '20161210'
- }
+ 'upload_date': '20161210',
+ 'description': '',
+ 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1488331553/3238987.jpg?w=640&h=480',
+ },
},
{
# Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
- 'url': 'https://player.piksel.com/v/v80kqp41',
+ 'url': 'https://player.piksel.tech/v/v80kqp41',
'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
'info_dict': {
'id': 'v80kqp41',
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
'timestamp': 1486171129,
- 'upload_date': '20170204'
- }
+ 'upload_date': '20170204',
+ 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1495569155/3279887.jpg?w=640&h=360',
+ },
},
{
# https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
'only_matching': True,
- }
+ },
]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
- webpage)
- if mobj:
- return mobj.group('url')
-
- def _call_api(self, app_token, resource, display_id, query, fatal=True):
- response = (self._download_json(
- 'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
- display_id, query=query, fatal=fatal) or {}).get('response')
- failure = try_get(response, lambda x: x['failure']['reason'])
+ def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.tech', fatal=True):
+ url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5')
+ response = traverse_obj(
+ self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {}
+ failure = traverse_obj(response, ('failure', 'reason')) if response else 'Empty response from API'
if failure:
if fatal:
raise ExtractorError(failure, expected=True)
webpage = self._download_webpage(url, display_id)
app_token = self._search_regex([
r'clientAPI\s*:\s*"([^"]+)"',
- r'data-de-api-key\s*=\s*"([^"]+)"'
+ r'data-de-api-key\s*=\s*"([^"]+)"',
], webpage, 'app token')
query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
program = self._call_api(
- app_token, 'program', display_id, query)['WsProgramResponse']['program']
+ app_token, 'program', display_id, query, url)['WsProgramResponse']['program']
video_id = program['uuid']
video_data = program['asset']
title = video_data['title']
elif asset_type == 'audio':
tbr = abr
- format_id = ['http']
- if tbr:
- format_id.append(compat_str(tbr))
-
formats.append({
- 'format_id': '-'.join(format_id),
+ 'format_id': join_nonempty('http', tbr),
'url': unescapeHTML(http_url),
'vbr': vbr,
'abr': abr,
process_asset_files(try_get(self._call_api(
app_token, 'asset_file', display_id, {
'assetid': asset_id,
- }, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
+ }, url, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
m3u8_url = dict_get(video_data, [
'm3u8iPadURL',
smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
if smil_url:
- transform_source = None
+ transform_source = lambda x: x.replace('src="/', 'src="')
if ref_id == 'nhkworld':
# TODO: figure out if this is something to be fixed in urljoin,
# _parse_smil_formats or keep it here
re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
transform_source=transform_source, fatal=False))
- self._sort_formats(formats, ('tbr', )) # Incomplete resolution information
-
subtitles = {}
for caption in video_data.get('captions', []):
caption_url = caption.get('url')
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
'subtitles': subtitles,
+ '_format_sort_fields': ('tbr', ), # Incomplete resolution information
}