]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/piksel.py
[openload] Fallback video extension to mp4
[yt-dlp.git] / youtube_dl / extractor / piksel.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 ExtractorError,
10 dict_get,
11 int_or_none,
12 unescapeHTML,
13 parse_iso8601,
14 )
15
16
17 class PikselIE(InfoExtractor):
18 _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
19 _TEST = {
20 'url': 'http://player.piksel.com/v/nv60p12f',
21 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
22 'info_dict': {
23 'id': 'nv60p12f',
24 'ext': 'mp4',
25 'title': 'فن الحياة - الحلقة 1',
26 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
27 'timestamp': 1465231790,
28 'upload_date': '20160606',
29 }
30 }
31
32 @staticmethod
33 def _extract_url(webpage):
34 mobj = re.search(
35 r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
36 webpage)
37 if mobj:
38 return mobj.group('url')
39
40 def _real_extract(self, url):
41 video_id = self._match_id(url)
42 webpage = self._download_webpage(url, video_id)
43 app_token = self._search_regex(
44 r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token')
45 response = self._download_json(
46 'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
47 video_id, query={
48 'v': video_id
49 })['response']
50 failure = response.get('failure')
51 if failure:
52 raise ExtractorError(response['failure']['reason'], expected=True)
53 video_data = response['WsProgramResponse']['program']['asset']
54 title = video_data['title']
55
56 formats = []
57
58 m3u8_url = dict_get(video_data, [
59 'm3u8iPadURL',
60 'ipadM3u8Url',
61 'm3u8AndroidURL',
62 'm3u8iPhoneURL',
63 'iphoneM3u8Url'])
64 if m3u8_url:
65 formats.extend(self._extract_m3u8_formats(
66 m3u8_url, video_id, 'mp4', 'm3u8_native',
67 m3u8_id='hls', fatal=False))
68
69 asset_type = dict_get(video_data, ['assetType', 'asset_type'])
70 for asset_file in video_data.get('assetFiles', []):
71 # TODO: extract rtmp formats
72 http_url = asset_file.get('http_url')
73 if not http_url:
74 continue
75 tbr = None
76 vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
77 abr = int_or_none(asset_file.get('audioBitrate'), 1024)
78 if asset_type == 'video':
79 tbr = vbr + abr
80 elif asset_type == 'audio':
81 tbr = abr
82
83 format_id = ['http']
84 if tbr:
85 format_id.append(compat_str(tbr))
86
87 formats.append({
88 'format_id': '-'.join(format_id),
89 'url': unescapeHTML(http_url),
90 'vbr': vbr,
91 'abr': abr,
92 'width': int_or_none(asset_file.get('videoWidth')),
93 'height': int_or_none(asset_file.get('videoHeight')),
94 'filesize': int_or_none(asset_file.get('filesize')),
95 'tbr': tbr,
96 })
97 self._sort_formats(formats)
98
99 return {
100 'id': video_id,
101 'title': title,
102 'description': video_data.get('description'),
103 'thumbnail': video_data.get('thumbnailUrl'),
104 'timestamp': parse_iso8601(video_data.get('dateadd')),
105 'formats': formats,
106 }