yt_dlp/extractor/arnes.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..compat import (
   6     compat_parse_qs,
   7     compat_urllib_parse_urlparse,
   8 )
   9 from ..utils import (
  10     format_field,
  11     float_or_none,
  12     int_or_none,
  13     parse_iso8601,
  14     remove_start,
  15 )
  16
  17
  18 class ArnesIE(InfoExtractor):
  19     IE_NAME = 'video.arnes.si'
  20     IE_DESC = 'Arnes Video'
  21     _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
  22     _TESTS = [{
  23         'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
  24         'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
  25         'info_dict': {
  26             'id': 'a1qrWTOQfVoU',
  27             'ext': 'mp4',
  28             'title': 'Linearna neodvisnost, definicija',
  29             'description': 'Linearna neodvisnost, definicija',
  30             'license': 'PRIVATE',
  31             'creator': 'Polona Oblak',
  32             'timestamp': 1585063725,
  33             'upload_date': '20200324',
  34             'channel': 'Polona Oblak',
  35             'channel_id': 'q6pc04hw24cj',
  36             'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
  37             'duration': 596.75,
  38             'view_count': int,
  39             'tags': ['linearna_algebra'],
  40             'start_time': 10,
  41         }
  42     }, {
  43         'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
  44         'only_matching': True,
  45     }, {
  46         'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
  47         'only_matching': True,
  48     }, {
  49         'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
  50         'only_matching': True,
  51     }, {
  52         'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
  53         'only_matching': True,
  54     }, {
  55         'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
  56         'only_matching': True,
  57     }]
  58     _BASE_URL = 'https://video.arnes.si'
  59
  60     def _real_extract(self, url):
  61         video_id = self._match_id(url)
  62
  63         video = self._download_json(
  64             self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
  65         title = video['title']
  66
  67         formats = []
  68         for media in (video.get('media') or []):
  69             media_url = media.get('url')
  70             if not media_url:
  71                 continue
  72             formats.append({
  73                 'url': self._BASE_URL + media_url,
  74                 'format_id': remove_start(media.get('format'), 'FORMAT_'),
  75                 'format_note': media.get('formatTranslation'),
  76                 'width': int_or_none(media.get('width')),
  77                 'height': int_or_none(media.get('height')),
  78             })
  79         self._sort_formats(formats)
  80
  81         channel = video.get('channel') or {}
  82         channel_id = channel.get('url')
  83         thumbnail = video.get('thumbnailUrl')
  84
  85         return {
  86             'id': video_id,
  87             'title': title,
  88             'formats': formats,
  89             'thumbnail': self._BASE_URL + thumbnail,
  90             'description': video.get('description'),
  91             'license': video.get('license'),
  92             'creator': video.get('author'),
  93             'timestamp': parse_iso8601(video.get('creationTime')),
  94             'channel': channel.get('name'),
  95             'channel_id': channel_id,
  96             'channel_url': format_field(channel_id, template=f'{self._BASE_URL}/?channel=%s'),
  97             'duration': float_or_none(video.get('duration'), 1000),
  98             'view_count': int_or_none(video.get('views')),
  99             'tags': video.get('hashtags'),
 100             'start_time': int_or_none(compat_parse_qs(
 101                 compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
 102         }