]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/boxcast.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / boxcast.py
1 from .common import InfoExtractor
2 from ..utils import (
3 js_to_json,
4 traverse_obj,
5 unified_timestamp
6 )
7
8
9 class BoxCastVideoIE(InfoExtractor):
10 _VALID_URL = r'''(?x)
11 https?://boxcast\.tv/(?:
12 view-embed/|
13 channel/\w+\?(?:[^#]+&)?b=|
14 video-portal/(?:\w+/){2}
15 )(?P<id>[\w-]+)'''
16 _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://boxcast\.tv/view-embed/[\w-]+)']
17 _TESTS = [{
18 'url': 'https://boxcast.tv/view-embed/in-the-midst-of-darkness-light-prevails-an-interdisciplinary-symposium-ozmq5eclj50ujl4bmpwx',
19 'info_dict': {
20 'id': 'da1eqqgkacngd5djlqld',
21 'ext': 'mp4',
22 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
23 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
24 'release_timestamp': 1670686812,
25 'release_date': '20221210',
26 'uploader_id': 're8w0v8hohhvpqtbskpe',
27 'uploader': 'Children\'s Health Defense',
28 }
29 }, {
30 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad',
31 'info_dict': {
32 'id': 'otbpltj2kzkveo2qz3ad',
33 'ext': 'mp4',
34 'uploader_id': 'vctwevwntun3o0ikq7af',
35 'uploader': 'Legacy Christian Church',
36 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools',
37 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg'
38 }
39 }, {
40 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev',
41 'info_dict': {
42 'id': 'ssihlw5gvfij2by8tkev',
43 'ext': 'mp4',
44 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg$',
45 'release_date': '20230101',
46 'uploader_id': 'ds25vaazhlu4ygcvffid',
47 'release_timestamp': 1672543201,
48 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland',
49 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340',
50 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022',
51 }
52 }]
53 _WEBPAGE_TESTS = [{
54 'url': 'https://childrenshealthdefense.eu/live-stream/',
55 'info_dict': {
56 'id': 'da1eqqgkacngd5djlqld',
57 'ext': 'mp4',
58 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
59 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
60 'release_timestamp': 1670686812,
61 'release_date': '20221210',
62 'uploader_id': 're8w0v8hohhvpqtbskpe',
63 'uploader': 'Children\'s Health Defense',
64 }
65 }]
66
67 def _real_extract(self, url):
68 display_id = self._match_id(url)
69 webpage = self._download_webpage(url, display_id)
70 webpage_json_data = self._search_json(
71 r'var\s*BOXCAST_PRELOAD\s*=', webpage, 'broadcast data', display_id,
72 transform_source=js_to_json, default={})
73
74 # Ref: https://support.boxcast.com/en/articles/4235158-build-a-custom-viewer-experience-with-boxcast-api
75 broadcast_json_data = (
76 traverse_obj(webpage_json_data, ('broadcast', 'data'))
77 or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}', display_id))
78 view_json_data = (
79 traverse_obj(webpage_json_data, ('view', 'data'))
80 or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}/view',
81 display_id, fatal=False) or {})
82
83 formats, subtitles = [], {}
84 if view_json_data.get('status') == 'recorded':
85 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
86 view_json_data['playlist'], display_id)
87
88 return {
89 'id': str(broadcast_json_data['id']),
90 'title': (broadcast_json_data.get('name')
91 or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
92 'description': (broadcast_json_data.get('description')
93 or self._html_search_meta(['og:description', 'twitter:description'], webpage)
94 or None),
95 'thumbnail': (broadcast_json_data.get('preview')
96 or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
97 'formats': formats,
98 'subtitles': subtitles,
99 'release_timestamp': unified_timestamp(broadcast_json_data.get('streamed_at')),
100 'uploader': broadcast_json_data.get('account_name'),
101 'uploader_id': broadcast_json_data.get('account_id'),
102 }