yt_dlp/extractor/kanalplay.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     ExtractorError,
   8     float_or_none,
   9     srt_subtitles_timecode,
  10 )
  11
  12
  13 class KanalPlayIE(InfoExtractor):
  14     IE_DESC = 'Kanal 5/9/11 Play'
  15     _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
  16     _TESTS = [{
  17         'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
  18         'info_dict': {
  19             'id': '3270012277',
  20             'ext': 'flv',
  21             'title': 'Saknar både dusch och avlopp',
  22             'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
  23             'duration': 2636.36,
  24         },
  25         'params': {
  26             # rtmp download
  27             'skip_download': True,
  28         }
  29     }, {
  30         'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
  31         'only_matching': True,
  32     }, {
  33         'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
  34         'only_matching': True,
  35     }]
  36
  37     def _fix_subtitles(self, subs):
  38         return '\r\n\r\n'.join(
  39             '%s\r\n%s --> %s\r\n%s'
  40             % (
  41                 num,
  42                 srt_subtitles_timecode(item['startMillis'] / 1000.0),
  43                 srt_subtitles_timecode(item['endMillis'] / 1000.0),
  44                 item['text'],
  45             ) for num, item in enumerate(subs, 1))
  46
  47     def _get_subtitles(self, channel_id, video_id):
  48         subs = self._download_json(
  49             'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
  50             video_id, 'Downloading subtitles JSON', fatal=False)
  51         return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
  52
  53     def _real_extract(self, url):
  54         mobj = self._match_valid_url(url)
  55         video_id = mobj.group('id')
  56         channel_id = mobj.group('channel_id')
  57
  58         video = self._download_json(
  59             'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
  60             video_id)
  61
  62         reasons_for_no_streams = video.get('reasonsForNoStreams')
  63         if reasons_for_no_streams:
  64             raise ExtractorError(
  65                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
  66                 expected=True)
  67
  68         title = video['title']
  69         description = video.get('description')
  70         duration = float_or_none(video.get('length'), 1000)
  71         thumbnail = video.get('posterUrl')
  72
  73         stream_base_url = video['streamBaseUrl']
  74
  75         formats = [{
  76             'url': stream_base_url,
  77             'play_path': stream['source'],
  78             'ext': 'flv',
  79             'tbr': float_or_none(stream.get('bitrate'), 1000),
  80             'rtmp_real_time': True,
  81         } for stream in video['streams']]
  82         self._sort_formats(formats)
  83
  84         subtitles = {}
  85         if video.get('hasSubtitle'):
  86             subtitles = self.extract_subtitles(channel_id, video_id)
  87
  88         return {
  89             'id': video_id,
  90             'title': title,
  91             'description': description,
  92             'thumbnail': thumbnail,
  93             'duration': duration,
  94             'formats': formats,
  95             'subtitles': subtitles,
  96         }