]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/fktv.py
[youtube] Add test with '};' in tags
[yt-dlp.git] / youtube_dl / extractor / fktv.py
CommitLineData
bf7aa630
PH
1from __future__ import unicode_literals
2
c5e743f6 3import re
71c107fc 4
5from .common import InfoExtractor
6from ..utils import (
c5e743f6 7 clean_html,
7b4137c3 8 determine_ext,
8ddf48d5 9 ExtractorError,
71c107fc 10)
11
c5e743f6 12
71c107fc 13class FKTVIE(InfoExtractor):
bf7aa630 14 IE_NAME = 'fernsehkritik.tv'
bd6b25ce 15 _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
71c107fc 16
c5e743f6 17 _TEST = {
bf7aa630 18 'url': 'http://fernsehkritik.tv/folge-1',
7b4137c3 19 'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
bf7aa630 20 'info_dict': {
7b4137c3 21 'id': '1',
22 'ext': 'mp4',
bf7aa630 23 'title': 'Folge 1 vom 10. April 2007',
57738039 24 'thumbnail': 're:^https?://.*\.jpg$',
c5e743f6
JMF
25 },
26 }
27
28 def _real_extract(self, url):
7b4137c3 29 episode = self._match_id(url)
30
711762f0
YCH
31 webpage = self._download_webpage(
32 'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
33 title = clean_html(self._html_search_regex(
34 '<h3>([^<]+)</h3>', webpage, 'title'))
35 matches = re.search(
8de28761
YCH
36 r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
37 webpage)
8ddf48d5
YCH
38 if matches is None:
39 raise ExtractorError('Unable to extract the video')
40
41 poster, sources = matches.groups()
57738039
YCH
42 if poster is None:
43 self.report_warning('unable to extract thumbnail')
44
140359fc 45 urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
711762f0 46 formats = [{
4866b72e 47 'url': furl,
3706fb5d 48 'format_id': determine_ext(furl),
4866b72e 49 } for furl in urls]
8ddf48d5
YCH
50 return {
51 'id': episode,
52 'title': title,
53 'formats': formats,
54 'thumbnail': poster,
55 }