[yt-dlp.git] / yt_dlp / extractor / blinkx.py

import json

from .common import InfoExtractor
from ..utils import (
    remove_start,
    int_or_none,
)


class BlinkxIE(InfoExtractor):
    _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
    IE_NAME = 'blinkx'

    _TEST = {
        'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
        'md5': '337cf7a344663ec79bf93a526a2e06c7',
        'info_dict': {
            'id': 'Da0Gw3xc',
            'ext': 'mp4',
            'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
            'uploader': 'IGN News',
            'upload_date': '20150217',
            'timestamp': 1424215740,
            'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
            'duration': 47.743333,
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        display_id = video_id[:8]

        api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
                   + 'video=%s' % video_id)
        data_json = self._download_webpage(api_url, display_id)
        data = json.loads(data_json)['api']['results'][0]
        duration = None
        thumbnails = []
        formats = []
        for m in data['media']:
            if m['type'] == 'jpg':
                thumbnails.append({
                    'url': m['link'],
                    'width': int(m['w']),
                    'height': int(m['h']),
                })
            elif m['type'] == 'original':
                duration = float(m['d'])
            elif m['type'] == 'youtube':
                yt_id = m['link']
                self.to_screen('Youtube video detected: %s' % yt_id)
                return self.url_result(yt_id, 'Youtube', video_id=yt_id)
            elif m['type'] in ('flv', 'mp4'):
                vcodec = remove_start(m['vcodec'], 'ff')
                acodec = remove_start(m['acodec'], 'ff')
                vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
                abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
                tbr = vbr + abr if vbr and abr else None
                format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
                formats.append({
                    'format_id': format_id,
                    'url': m['link'],
                    'vcodec': vcodec,
                    'acodec': acodec,
                    'abr': abr,
                    'vbr': vbr,
                    'tbr': tbr,
                    'width': int_or_none(m.get('w')),
                    'height': int_or_none(m.get('h')),
                })

        self._sort_formats(formats)

        return {
            'id': display_id,
            'fullid': video_id,
            'title': data['title'],
            'formats': formats,
            'uploader': data.get('channel_name'),
            'timestamp': data.get('pubdate_epoch'),
            'description': data.get('description'),
            'thumbnails': thumbnails,
            'duration': duration,
        }
Commit	Line	Data
d7dda168	1	import json
d7dda168 PH	2
d7dda168 PH	3	from .common import InfoExtractor
dd0a58f5 S	4	from ..utils import (
	5	remove_start,
	6	int_or_none,
	7	)
d7dda168 PH	8
	9
	10	class BlinkxIE(InfoExtractor):
dd0a58f5	11	_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/\|blinkx:)(?P<id>[^?]+)'
0dc13f4c	12	IE_NAME = 'blinkx'
d7dda168 PH	13
d7dda168 PH	14	_TEST = {
dd0a58f5 S	15	'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
dd0a58f5 S	16	'md5': '337cf7a344663ec79bf93a526a2e06c7',
f577e0ce	17	'info_dict': {
dd0a58f5	18	'id': 'Da0Gw3xc',
8f93030c	19	'ext': 'mp4',
dd0a58f5 S	20	'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
	21	'uploader': 'IGN News',
	22	'upload_date': '20150217',
	23	'timestamp': 1424215740,
	24	'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
	25	'duration': 47.743333,
d7dda168 PH	26	},
	27	}
	28
dd0a58f5 S	29	def _real_extract(self, url):
dd0a58f5 S	30	video_id = self._match_id(url)
d7dda168 PH	31	display_id = video_id[:8]
d7dda168 PH	32
3089bc74 S	33	api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
3089bc74 S	34	+ 'video=%s' % video_id)
d7dda168 PH	35	data_json = self._download_webpage(api_url, display_id)
d7dda168 PH	36	data = json.loads(data_json)['api']['results'][0]
d7dda168 PH	37	duration = None
	38	thumbnails = []
	39	formats = []
	40	for m in data['media']:
	41	if m['type'] == 'jpg':
	42	thumbnails.append({
	43	'url': m['link'],
	44	'width': int(m['w']),
	45	'height': int(m['h']),
	46	})
	47	elif m['type'] == 'original':
8d2cc6fb	48	duration = float(m['d'])
768df745 JMF	49	elif m['type'] == 'youtube':
768df745 JMF	50	yt_id = m['link']
8f93030c	51	self.to_screen('Youtube video detected: %s' % yt_id)
768df745	52	return self.url_result(yt_id, 'Youtube', video_id=yt_id)
d7dda168 PH	53	elif m['type'] in ('flv', 'mp4'):
	54	vcodec = remove_start(m['vcodec'], 'ff')
	55	acodec = remove_start(m['acodec'], 'ff')
dd0a58f5 S	56	vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
	57	abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
	58	tbr = vbr + abr if vbr and abr else None
8f93030c	59	format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
d7dda168 PH	60	formats.append({
	61	'format_id': format_id,
	62	'url': m['link'],
	63	'vcodec': vcodec,
	64	'acodec': acodec,
dd0a58f5 S	65	'abr': abr,
dd0a58f5 S	66	'vbr': vbr,
4bc60daf	67	'tbr': tbr,
dd0a58f5 S	68	'width': int_or_none(m.get('w')),
dd0a58f5 S	69	'height': int_or_none(m.get('h')),
d7dda168	70	})
4bc60daf PH	71
4bc60daf PH	72	self._sort_formats(formats)
d7dda168 PH	73
	74	return {
	75	'id': display_id,
	76	'fullid': video_id,
	77	'title': data['title'],
	78	'formats': formats,
06425e96	79	'uploader': data.get('channel_name'),
06425e96	80	'timestamp': data.get('pubdate_epoch'),
d7dda168 PH	81	'description': data.get('description'),
	82	'thumbnails': thumbnails,
	83	'duration': duration,
	84	}