]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/rts.py
[RTS] Add extractor (Fixes #2608)
[yt-dlp.git] / youtube_dl / extractor / rts.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 int_or_none,
9 parse_duration,
10 parse_iso8601,
11 unescapeHTML,
12 )
13
14
15 class RTSIE(InfoExtractor):
16 IE_DESC = 'RTS.ch'
17 _VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
18
19 _TEST = {
20 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
21 'md5': '753b877968ad8afaeddccc374d4256a5',
22 'info_dict': {
23 'id': '3449373',
24 'ext': 'mp4',
25 'duration': 1488,
26 'title': 'Les Enfants Terribles',
27 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
28 'uploader': 'Divers',
29 'upload_date': '19680921',
30 'timestamp': -40280400,
31 },
32 }
33
34 def _real_extract(self, url):
35 m = re.match(self._VALID_URL, url)
36 video_id = m.group('id')
37
38 all_info = self._download_json(
39 'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
40 info = all_info['video']['JSONinfo']
41
42 upload_timestamp = parse_iso8601(info.get('broadcast_date'))
43 duration = parse_duration(info.get('duration'))
44 thumbnail = unescapeHTML(info.get('preview_image_url'))
45 formats = [{
46 'format_id': fid,
47 'url': furl,
48 'tbr': int_or_none(self._search_regex(
49 r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
50 } for fid, furl in info['streams'].items()]
51 self._sort_formats(formats)
52
53 return {
54 'id': video_id,
55 'formats': formats,
56 'title': info['title'],
57 'description': info.get('intro'),
58 'duration': duration,
59 'uploader': info.get('programName'),
60 'timestamp': upload_timestamp,
61 }