]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/piksel.py
3 from . common
import InfoExtractor
15 class PikselIE ( InfoExtractor
):
16 _VALID_URL
= r
'''(?x)https?://
24 (?:api|player)\.multicastmedia|
25 (?:api-ovp|player)\.piksel
31 vidego\.baltimorecity\.gov
32 )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
35 'url' : 'http://player.piksel.com/v/ums2867l' ,
36 'md5' : '34e34c8d89dc2559976a6079db531e85' ,
40 'title' : 'GX-005 with Caption' ,
41 'timestamp' : 1481335659 ,
42 'upload_date' : '20161210'
46 # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
47 'url' : 'https://player.piksel.com/v/v80kqp41' ,
48 'md5' : '753ddcd8cc8e4fa2dda4b7be0e77744d' ,
52 'title' : 'WAW- State of Washington vs. Donald J. Trump, et al' ,
53 'description' : 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.' ,
54 'timestamp' : 1486171129 ,
55 'upload_date' : '20170204'
59 # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
60 'url' : 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477' ,
61 'only_matching' : True ,
66 def _extract_url ( webpage
):
68 r
'<iframe[^>]+src=["\' ]( ?P
< url
>( ?
: https?
:) ?
// player\
. piksel\
. com
/ v
/[ a
- z0
- 9 ]+) ',
71 return mobj.group(' url
')
73 def _call_api(self, app_token, resource, display_id, query, fatal=True):
74 response = (self._download_json(
75 ' http
:// player
. piksel
. com
/ ws
/ ws_
%s/ api
/ %s/ mode
/ json
/ apiv
/ 5 ' % (resource, app_token),
76 display_id, query=query, fatal=fatal) or {}).get(' response
')
77 failure = try_get(response, lambda x: x[' failure
'][' reason
'])
80 raise ExtractorError(failure, expected=True)
81 self.report_warning(failure)
84 def _real_extract(self, url):
85 ref_id, display_id = self._match_valid_url(url).groups()
86 webpage = self._download_webpage(url, display_id)
87 app_token = self._search_regex([
88 r' clientAPI\s
*: \s
* "([^" ]+) "',
89 r'data-de-api-key\s*=\s*" ([ ^
"]+)" '
90 ], webpage, ' app token
')
91 query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
92 program = self._call_api(
93 app_token, ' program
', display_id, query)[' WsProgramResponse
'][' program
']
94 video_id = program[' uuid
']
95 video_data = program[' asset
']
96 title = video_data[' title
']
97 asset_type = dict_get(video_data, [' assetType
', ' asset_type
'])
101 def process_asset_file(asset_file):
104 # TODO: extract rtmp formats
105 http_url = asset_file.get(' http_url
')
109 vbr = int_or_none(asset_file.get(' videoBitrate
'), 1024)
110 abr = int_or_none(asset_file.get(' audioBitrate
'), 1024)
111 if asset_type == ' video
':
113 elif asset_type == ' audio
':
117 ' format_id
': join_nonempty(' http
', tbr),
118 ' url
': unescapeHTML(http_url),
121 ' width
': int_or_none(asset_file.get(' videoWidth
')),
122 ' height
': int_or_none(asset_file.get(' videoHeight
')),
123 ' filesize
': int_or_none(asset_file.get(' filesize
')),
127 def process_asset_files(asset_files):
128 for asset_file in (asset_files or []):
129 process_asset_file(asset_file)
131 process_asset_files(video_data.get(' assetFiles
'))
132 process_asset_file(video_data.get(' referenceFile
'))
134 asset_id = video_data.get(' assetid
') or program.get(' assetid
')
136 process_asset_files(try_get(self._call_api(
137 app_token, ' asset_file
', display_id, {
139 }, False), lambda x: x[' WsAssetFileResponse
'][' AssetFiles
']))
141 m3u8_url = dict_get(video_data, [
148 formats.extend(self._extract_m3u8_formats(
149 m3u8_url, video_id, ' mp4
', ' m3u8_native
',
150 m3u8_id=' hls
', fatal=False))
152 smil_url = dict_get(video_data, [' httpSmil
', ' hdSmil
', ' rtmpSmil
'])
154 transform_source = None
155 if ref_id == ' nhkworld
':
156 # TODO: figure out if this is something to be fixed in urljoin,
157 # _parse_smil_formats or keep it here
158 transform_source = lambda x: x.replace(' src
= "/', 'src=" ').replace(' / media
"', '/media/" ')
159 formats.extend(self._extract_smil_formats(
160 re.sub(r' / od
/[ ^
/]+/ ', ' / od
/ http
/ ', smil_url), video_id,
161 transform_source=transform_source, fatal=False))
163 self._sort_formats(formats, (' tbr
', )) # Incomplete resolution information
166 for caption in video_data.get(' captions
', []):
167 caption_url = caption.get(' url
')
169 subtitles.setdefault(caption.get(' locale
', ' en
'), []).append({
175 ' description
': video_data.get(' description
'),
176 ' thumbnail
': video_data.get(' thumbnailUrl
'),
177 ' timestamp
': parse_iso8601(video_data.get(' dateadd
')),
179 ' subtitles
': subtitles,