yt_dlp/extractor/radiobremen.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import parse_duration
   5
   6
   7 class RadioBremenIE(InfoExtractor):
   8     _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)'
   9     IE_NAME = 'radiobremen'
  10
  11     _TEST = {
  12         'url': 'http://www.radiobremen.de/mediathek/?id=141876',
  13         'info_dict': {
  14             'id': '141876',
  15             'ext': 'mp4',
  16             'duration': 178,
  17             'width': 512,
  18             'title': 'Druck auf Patrick Öztürk',
  19             'thumbnail': r're:https?://.*\.jpg$',
  20             'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.',
  21         },
  22     }
  23
  24     def _real_extract(self, url):
  25         video_id = self._match_id(url)
  26
  27         meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id
  28         meta_doc = self._download_webpage(
  29             meta_url, video_id, 'Downloading metadata')
  30         title = self._html_search_regex(
  31             r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title')
  32         description = self._html_search_regex(
  33             r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False)
  34         duration = parse_duration(self._html_search_regex(
  35             r'L&auml;nge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>',
  36             meta_doc, 'duration', fatal=False))
  37
  38         page_doc = self._download_webpage(
  39             url, video_id, 'Downloading video information')
  40         mobj = re.search(
  41             r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)",
  42             page_doc)
  43         video_url = (
  44             "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" %
  45             (video_id, video_id, mobj.group("secret"), mobj.group('width')))
  46
  47         formats = [{
  48             'url': video_url,
  49             'ext': 'mp4',
  50             'width': int(mobj.group('width')),
  51         }]
  52         return {
  53             'id': video_id,
  54             'title': title,
  55             'description': description,
  56             'duration': duration,
  57             'formats': formats,
  58             'thumbnail': mobj.group('thumbnail'),
  59         }