4 from ..utils
import int_or_none
, unified_timestamp
, unescapeHTML
5 from .common
import InfoExtractor
8 class HRFernsehenIE(InfoExtractor
):
9 IE_NAME
= 'hrfernsehen'
10 _VALID_URL
= r
'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
13 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
14 'md5': '5c4e0ba94677c516a2f65a84110fc536',
18 'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
19 'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
20 'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
21 'subtitles': {'de': [{
22 'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
24 'timestamp': 1598470200,
25 'upload_date': '20200826',
26 'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
27 'title': 'hessenschau vom 26.08.2020'
30 'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
34 _GEO_COUNTRIES
= ['DE']
36 def extract_airdate(self
, loader_data
):
37 airdate_str
= loader_data
.get('mediaMetadata', {}).get('agf', {}
).get('airdate')
39 if airdate_str
is None:
42 return unified_timestamp(airdate_str
)
44 def extract_formats(self
, loader_data
):
46 for stream_obj
in loader_data
["videoResolutionLevels"]:
48 'format_id': str(stream_obj
['verticalResolution']) + "p",
49 'height': stream_obj
['verticalResolution'],
50 'url': stream_obj
['url'],
53 quality_information
= re
.search(r
'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
55 if quality_information
:
56 stream_format
['width'] = int_or_none(quality_information
.group(1))
57 stream_format
['height'] = int_or_none(quality_information
.group(2))
58 stream_format
['fps'] = int_or_none(quality_information
.group(3))
59 stream_format
['tbr'] = int_or_none(quality_information
.group(4))
61 stream_formats
.append(stream_format
)
63 self
._sort
_formats
(stream_formats
)
66 def _real_extract(self
, url
):
67 video_id
= self
._match
_id
(url
)
68 webpage
= self
._download
_webpage
(url
, video_id
)
70 title
= self
._html
_search
_meta
(
71 ['og:title', 'twitter:title', 'name'], webpage
)
72 description
= self
._html
_search
_meta
(
73 ['description'], webpage
)
75 loader_str
= unescapeHTML(self
._search
_regex
(r
"data-new-hr-mediaplayer-loader='([^']*)'", webpage
, "ardloader"))
76 loader_data
= json
.loads(loader_str
)
81 'description': description
,
82 'formats': self
.extract_formats(loader_data
),
83 'timestamp': self
.extract_airdate(loader_data
)
86 if "subtitle" in loader_data
:
87 info
["subtitles"] = {"de": [{"url": loader_data["subtitle"]}
]}
89 thumbnails
= list(set([t
for t
in loader_data
.get("previewImageUrl", {}).values()]))
90 if len(thumbnails
) > 0:
91 info
["thumbnails"] = [{"url": t}
for t
in thumbnails
]