]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/joqrag.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / joqrag.py
CommitLineData
c305a25c 1import datetime as dt
db8b4edc
M
2import urllib.parse
3
4from .common import InfoExtractor
5from ..utils import (
6 clean_html,
7 datetime_from_str,
8 unified_timestamp,
9 urljoin,
10)
11
12
13class JoqrAgIE(InfoExtractor):
14 IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)'
15 _VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',
16 r'https?://(?:www\.)?joqr\.co\.jp/ag/',
17 r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
18 _TESTS = [{
19 'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
20 'info_dict': {
21 'id': 'live',
22 'title': str,
23 'channel': '超!A&G+',
24 'description': str,
25 'live_status': 'is_live',
26 'release_timestamp': int,
27 },
28 'params': {
29 'skip_download': True,
30 'ignore_no_formats_error': True,
31 },
32 }, {
33 'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
34 'only_matching': True,
35 }, {
36 'url': 'https://www.joqr.co.jp/ag/article/103760/',
37 'only_matching': True,
38 }, {
39 'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
40 'only_matching': True,
41 }, {
42 'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
43 'only_matching': True,
44 }]
45
46 def _extract_metadata(self, variable, html):
47 return clean_html(urllib.parse.unquote_plus(self._search_regex(
48 rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
49 html, 'metadata', group='value', default=''))) or None
50
51 def _extract_start_timestamp(self, video_id, is_live):
52 def extract_start_time_from(date_str):
c305a25c 53 dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9)
54 date = dt_.strftime('%Y%m%d')
db8b4edc
M
55 start_time = self._search_regex(
56 r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
57 self._download_webpage(
58 f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
59 note=f'Downloading program list of {date}', fatal=False,
60 errnote=f'Failed to download program list of {date}') or '',
61 'start time', default=None)
62 if start_time:
c305a25c 63 return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00')
db8b4edc
M
64 return None
65
66 start_timestamp = extract_start_time_from('today')
67 if not start_timestamp:
68 return None
69
70 if not is_live or start_timestamp < datetime_from_str('now').timestamp():
71 return start_timestamp
72 else:
73 return extract_start_time_from('yesterday')
74
75 def _real_extract(self, url):
76 video_id = 'live'
77
78 metadata = self._download_webpage(
79 'https://www.uniqueradio.jp/aandg', video_id,
80 note='Downloading metadata', errnote='Failed to download metadata')
81 title = self._extract_metadata('Program_name', metadata)
82
f2fd449b 83 if not title or title == '放送休止':
db8b4edc
M
84 formats = []
85 live_status = 'is_upcoming'
86 release_timestamp = self._extract_start_timestamp(video_id, False)
87 msg = 'This stream is not currently live'
88 if release_timestamp:
89 msg += (' and will start at '
c305a25c 90 + dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
db8b4edc
M
91 self.raise_no_formats(msg, expected=True)
92 else:
93 m3u8_path = self._search_regex(
94 r'<source\s[^>]*\bsrc="([^"]+)"',
95 self._download_webpage(
96 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
97 note='Downloading player data', errnote='Failed to download player data'),
98 'm3u8 url')
99 formats = self._extract_m3u8_formats(
100 urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
101 live_status = 'is_live'
102 release_timestamp = self._extract_start_timestamp(video_id, True)
103
104 return {
105 'id': video_id,
106 'title': title,
107 'channel': '超!A&G+',
108 'description': self._extract_metadata('Program_text', metadata),
109 'formats': formats,
110 'live_status': live_status,
111 'release_timestamp': release_timestamp,
112 }