]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/piksel.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
7 from .. compat
import compat_str
18 class PikselIE ( InfoExtractor
):
19 _VALID_URL
= r
'''(?x)https?://
27 (?:api|player)\.multicastmedia|
28 (?:api-ovp|player)\.piksel
34 vidego\.baltimorecity\.gov
35 )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
38 'url' : 'http://player.piksel.com/v/ums2867l' ,
39 'md5' : '34e34c8d89dc2559976a6079db531e85' ,
43 'title' : 'GX-005 with Caption' ,
44 'timestamp' : 1481335659 ,
45 'upload_date' : '20161210'
49 # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
50 'url' : 'https://player.piksel.com/v/v80kqp41' ,
51 'md5' : '753ddcd8cc8e4fa2dda4b7be0e77744d' ,
55 'title' : 'WAW- State of Washington vs. Donald J. Trump, et al' ,
56 'description' : 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.' ,
57 'timestamp' : 1486171129 ,
58 'upload_date' : '20170204'
62 # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
63 'url' : 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477' ,
64 'only_matching' : True ,
69 def _extract_url ( webpage
):
71 r
'<iframe[^>]+src=["\' ]( ?P
< url
>( ?
: https?
:) ?
// player\
. piksel\
. com
/ v
/[ a
- z0
- 9 ]+) ',
74 return mobj.group(' url
')
76 def _call_api(self, app_token, resource, display_id, query, fatal=True):
77 response = (self._download_json(
78 ' http
:// player
. piksel
. com
/ ws
/ ws_
%s/ api
/ %s/ mode
/ json
/ apiv
/ 5 ' % (resource, app_token),
79 display_id, query=query, fatal=fatal) or {}).get(' response
')
80 failure = try_get(response, lambda x: x[' failure
'][' reason
'])
83 raise ExtractorError(failure, expected=True)
84 self.report_warning(failure)
87 def _real_extract(self, url):
88 ref_id, display_id = self._match_valid_url(url).groups()
89 webpage = self._download_webpage(url, display_id)
90 app_token = self._search_regex([
91 r' clientAPI\s
*: \s
* "([^" ]+) "',
92 r'data-de-api-key\s*=\s*" ([ ^
"]+)" '
93 ], webpage, ' app token
')
94 query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
95 program = self._call_api(
96 app_token, ' program
', display_id, query)[' WsProgramResponse
'][' program
']
97 video_id = program[' uuid
']
98 video_data = program[' asset
']
99 title = video_data[' title
']
100 asset_type = dict_get(video_data, [' assetType
', ' asset_type
'])
104 def process_asset_file(asset_file):
107 # TODO: extract rtmp formats
108 http_url = asset_file.get(' http_url
')
112 vbr = int_or_none(asset_file.get(' videoBitrate
'), 1024)
113 abr = int_or_none(asset_file.get(' audioBitrate
'), 1024)
114 if asset_type == ' video
':
116 elif asset_type == ' audio
':
121 format_id.append(compat_str(tbr))
124 ' format_id
': ' - '.join(format_id),
125 ' url
': unescapeHTML(http_url),
128 ' width
': int_or_none(asset_file.get(' videoWidth
')),
129 ' height
': int_or_none(asset_file.get(' videoHeight
')),
130 ' filesize
': int_or_none(asset_file.get(' filesize
')),
134 def process_asset_files(asset_files):
135 for asset_file in (asset_files or []):
136 process_asset_file(asset_file)
138 process_asset_files(video_data.get(' assetFiles
'))
139 process_asset_file(video_data.get(' referenceFile
'))
141 asset_id = video_data.get(' assetid
') or program.get(' assetid
')
143 process_asset_files(try_get(self._call_api(
144 app_token, ' asset_file
', display_id, {
146 }, False), lambda x: x[' WsAssetFileResponse
'][' AssetFiles
']))
148 m3u8_url = dict_get(video_data, [
155 formats.extend(self._extract_m3u8_formats(
156 m3u8_url, video_id, ' mp4
', ' m3u8_native
',
157 m3u8_id=' hls
', fatal=False))
159 smil_url = dict_get(video_data, [' httpSmil
', ' hdSmil
', ' rtmpSmil
'])
161 transform_source = None
162 if ref_id == ' nhkworld
':
163 # TODO: figure out if this is something to be fixed in urljoin,
164 # _parse_smil_formats or keep it here
165 transform_source = lambda x: x.replace(' src
= "/', 'src=" ').replace(' / media
"', '/media/" ')
166 formats.extend(self._extract_smil_formats(
167 re.sub(r' / od
/[ ^
/]+/ ', ' / od
/ http
/ ', smil_url), video_id,
168 transform_source=transform_source, fatal=False))
170 self._sort_formats(formats, (' tbr
', )) # Incomplete resolution information
173 for caption in video_data.get(' captions
', []):
174 caption_url = caption.get(' url
')
176 subtitles.setdefault(caption.get(' locale
', ' en
'), []).append({
182 ' description
': video_data.get(' description
'),
183 ' thumbnail
': video_data.get(' thumbnailUrl
'),
184 ' timestamp
': parse_iso8601(video_data.get(' dateadd
')),
186 ' subtitles
': subtitles,