[yt-dlp.git] / yt_dlp / extractor / arnes.py

from .common import InfoExtractor
from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_urlparse,
)
from ..utils import (
    float_or_none,
    format_field,
    int_or_none,
    parse_iso8601,
    remove_start,
)


class ArnesIE(InfoExtractor):
    IE_NAME = 'video.arnes.si'
    IE_DESC = 'Arnes Video'
    _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
    _TESTS = [{
        'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
        'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
        'info_dict': {
            'id': 'a1qrWTOQfVoU',
            'ext': 'mp4',
            'title': 'Linearna neodvisnost, definicija',
            'description': 'Linearna neodvisnost, definicija',
            'license': 'PRIVATE',
            'creator': 'Polona Oblak',
            'timestamp': 1585063725,
            'upload_date': '20200324',
            'channel': 'Polona Oblak',
            'channel_id': 'q6pc04hw24cj',
            'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
            'duration': 596.75,
            'view_count': int,
            'tags': ['linearna_algebra'],
            'start_time': 10,
        }
    }, {
        'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
        'only_matching': True,
    }, {
        'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
        'only_matching': True,
    }, {
        'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
        'only_matching': True,
    }, {
        'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
        'only_matching': True,
    }, {
        'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
        'only_matching': True,
    }]
    _BASE_URL = 'https://video.arnes.si'

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video = self._download_json(
            self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
        title = video['title']

        formats = []
        for media in (video.get('media') or []):
            media_url = media.get('url')
            if not media_url:
                continue
            formats.append({
                'url': self._BASE_URL + media_url,
                'format_id': remove_start(media.get('format'), 'FORMAT_'),
                'format_note': media.get('formatTranslation'),
                'width': int_or_none(media.get('width')),
                'height': int_or_none(media.get('height')),
            })

        channel = video.get('channel') or {}
        channel_id = channel.get('url')
        thumbnail = video.get('thumbnailUrl')

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'thumbnail': self._BASE_URL + thumbnail,
            'description': video.get('description'),
            'license': video.get('license'),
            'creator': video.get('author'),
            'timestamp': parse_iso8601(video.get('creationTime')),
            'channel': channel.get('name'),
            'channel_id': channel_id,
            'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'),
            'duration': float_or_none(video.get('duration'), 1000),
            'view_count': int_or_none(video.get('views')),
            'tags': video.get('hashtags'),
            'start_time': int_or_none(compat_parse_qs(
                compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
        }
Commit	Line	Data
39ed931e	1	from .common import InfoExtractor
	2	from ..compat import (
	3	compat_parse_qs,
	4	compat_urllib_parse_urlparse,
	5	)
	6	from ..utils import (
	7	float_or_none,
e897bd82	8	format_field,
39ed931e	9	int_or_none,
	10	parse_iso8601,
	11	remove_start,
	12	)
	13
	14
	15	class ArnesIE(InfoExtractor):
	16	IE_NAME = 'video.arnes.si'
	17	IE_DESC = 'Arnes Video'
	18	_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch\|embed\|api/(?:asset\|public/video))/(?P<id>[0-9a-zA-Z]{12})'
	19	_TESTS = [{
	20	'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
	21	'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
	22	'info_dict': {
	23	'id': 'a1qrWTOQfVoU',
	24	'ext': 'mp4',
	25	'title': 'Linearna neodvisnost, definicija',
	26	'description': 'Linearna neodvisnost, definicija',
	27	'license': 'PRIVATE',
	28	'creator': 'Polona Oblak',
	29	'timestamp': 1585063725,
	30	'upload_date': '20200324',
	31	'channel': 'Polona Oblak',
	32	'channel_id': 'q6pc04hw24cj',
	33	'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
	34	'duration': 596.75,
	35	'view_count': int,
	36	'tags': ['linearna_algebra'],
	37	'start_time': 10,
	38	}
	39	}, {
	40	'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
	41	'only_matching': True,
	42	}, {
	43	'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
	44	'only_matching': True,
	45	}, {
	46	'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
	47	'only_matching': True,
	48	}, {
	49	'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
	50	'only_matching': True,
	51	}, {
	52	'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
	53	'only_matching': True,
	54	}]
	55	_BASE_URL = 'https://video.arnes.si'
	56
	57	def _real_extract(self, url):
	58	video_id = self._match_id(url)
	59
	60	video = self._download_json(
	61	self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
	62	title = video['title']
	63
	64	formats = []
	65	for media in (video.get('media') or []):
	66	media_url = media.get('url')
	67	if not media_url:
	68	continue
	69	formats.append({
	70	'url': self._BASE_URL + media_url,
	71	'format_id': remove_start(media.get('format'), 'FORMAT_'),
	72	'format_note': media.get('formatTranslation'),
73	'width': int_or_none(media.get('width')),
74	'height': int_or_none(media.get('height')),
75	})
39ed931e	76
	77	channel = video.get('channel') or {}
	78	channel_id = channel.get('url')
	79	thumbnail = video.get('thumbnailUrl')
	80
	81	return {
	82	'id': video_id,
	83	'title': title,
	84	'formats': formats,
	85	'thumbnail': self._BASE_URL + thumbnail,
	86	'description': video.get('description'),
	87	'license': video.get('license'),
	88	'creator': video.get('author'),
	89	'timestamp': parse_iso8601(video.get('creationTime')),
	90	'channel': channel.get('name'),
	91	'channel_id': channel_id,
a70635b8	92	'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'),
39ed931e	93	'duration': float_or_none(video.get('duration'), 1000),
	94	'view_count': int_or_none(video.get('views')),
	95	'tags': video.get('hashtags'),
	96	'start_time': int_or_none(compat_parse_qs(
	97	compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
	98	}