]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/fktv.py
Fix "invalid escape sequences" error on Python 3.6
[yt-dlp.git] / youtube_dl / extractor / fktv.py
CommitLineData
bf7aa630
PH
1from __future__ import unicode_literals
2
71c107fc 3from .common import InfoExtractor
4from ..utils import (
c5e743f6 5 clean_html,
7b4137c3 6 determine_ext,
e0f06eae 7 js_to_json,
71c107fc 8)
9
c5e743f6 10
71c107fc 11class FKTVIE(InfoExtractor):
bf7aa630 12 IE_NAME = 'fernsehkritik.tv'
5886b38d 13 _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
71c107fc 14
c5e743f6 15 _TEST = {
bf7aa630 16 'url': 'http://fernsehkritik.tv/folge-1',
7b4137c3 17 'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
bf7aa630 18 'info_dict': {
7b4137c3 19 'id': '1',
20 'ext': 'mp4',
bf7aa630 21 'title': 'Folge 1 vom 10. April 2007',
ec85ded8 22 'thumbnail': r're:^https?://.*\.jpg$',
c5e743f6
JMF
23 },
24 }
25
26 def _real_extract(self, url):
7b4137c3 27 episode = self._match_id(url)
28
711762f0
YCH
29 webpage = self._download_webpage(
30 'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
31 title = clean_html(self._html_search_regex(
32 '<h3>([^<]+)</h3>', webpage, 'title'))
e0f06eae 33 thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
34 sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
35
36 formats = []
37 for source in sources:
38 furl = source.get('src')
39 if furl:
40 formats.append({
41 'url': furl,
42 'format_id': determine_ext(furl),
43 })
44 self._sort_formats(formats)
45
8ddf48d5
YCH
46 return {
47 'id': episode,
48 'title': title,
49 'formats': formats,
e0f06eae 50 'thumbnail': thumbnail,
8ddf48d5 51 }