youtube_dl/extractor/rts.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     int_or_none,
   9     parse_duration,
  10     parse_iso8601,
  11     unescapeHTML,
  12 )
  13
  14
  15 class RTSIE(InfoExtractor):
  16     IE_DESC = 'RTS.ch'
  17     _VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
  18
  19     _TEST = {
  20         'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
  21         'md5': '753b877968ad8afaeddccc374d4256a5',
  22         'info_dict': {
  23             'id': '3449373',
  24             'ext': 'mp4',
  25             'duration': 1488,
  26             'title': 'Les Enfants Terribles',
  27             'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
  28             'uploader': 'Divers',
  29             'upload_date': '19680921',
  30             'timestamp': -40280400,
  31         },
  32     }
  33
  34     def _real_extract(self, url):
  35         m = re.match(self._VALID_URL, url)
  36         video_id = m.group('id')
  37
  38         all_info = self._download_json(
  39             'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
  40         info = all_info['video']['JSONinfo']
  41
  42         upload_timestamp = parse_iso8601(info.get('broadcast_date'))
  43         duration = parse_duration(info.get('duration'))
  44         thumbnail = unescapeHTML(info.get('preview_image_url'))
  45         formats = [{
  46             'format_id': fid,
  47             'url': furl,
  48             'tbr': int_or_none(self._search_regex(
  49                 r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
  50         } for fid, furl in info['streams'].items()]
  51         self._sort_formats(formats)
  52
  53         return {
  54             'id': video_id,
  55             'formats': formats,
  56             'title': info['title'],
  57             'description': info.get('intro'),
  58             'duration': duration,
  59             'uploader': info.get('programName'),
  60             'timestamp': upload_timestamp,
  61         }