]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/abcotvs.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / abcotvs.py
1 from .common import InfoExtractor
2 from ..utils import (
3 dict_get,
4 int_or_none,
5 try_get,
6 )
7
8
9 class ABCOTVSIE(InfoExtractor):
10 IE_NAME = 'abcotvs'
11 IE_DESC = 'ABC Owned Television Stations'
12 _VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
13 _TESTS = [
14 {
15 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
16 'info_dict': {
17 'id': '472548',
18 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
19 'ext': 'mp4',
20 'title': 'East Bay museum celebrates synthesized music',
21 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
22 'thumbnail': r're:^https?://.*\.jpg$',
23 'timestamp': 1421118520,
24 'upload_date': '20150113',
25 },
26 'params': {
27 # m3u8 download
28 'skip_download': True,
29 },
30 },
31 {
32 'url': 'http://abc7news.com/472581',
33 'only_matching': True,
34 },
35 {
36 'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
37 'only_matching': True,
38 },
39 ]
40 _SITE_MAP = {
41 '6abc': 'wpvi',
42 'abc11': 'wtvd',
43 'abc13': 'ktrk',
44 'abc30': 'kfsn',
45 'abc7': 'kabc',
46 'abc7chicago': 'wls',
47 'abc7news': 'kgo',
48 'abc7ny': 'wabc',
49 }
50
51 def _real_extract(self, url):
52 site, display_id, video_id = self._match_valid_url(url).groups()
53 display_id = display_id or video_id
54 station = self._SITE_MAP[site]
55
56 data = self._download_json(
57 'https://api.abcotvs.com/v2/content', display_id, query={
58 'id': video_id,
59 'key': f'otv.web.{station}.story',
60 'station': station,
61 })['data']
62 video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
63 video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
64 title = video.get('title') or video['linkText']
65
66 formats = []
67 m3u8_url = video.get('m3u8')
68 if m3u8_url:
69 formats = self._extract_m3u8_formats(
70 video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
71 mp4_url = video.get('mp4')
72 if mp4_url:
73 formats.append({
74 'abr': 128,
75 'format_id': 'https',
76 'height': 360,
77 'url': mp4_url,
78 'width': 640,
79 })
80
81 image = video.get('image') or {}
82
83 return {
84 'id': video_id,
85 'display_id': display_id,
86 'title': title,
87 'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
88 'thumbnail': dict_get(image, ('source', 'dynamicSource')),
89 'timestamp': int_or_none(video.get('date')),
90 'duration': int_or_none(video.get('length')),
91 'formats': formats,
92 }
93
94
95 class ABCOTVSClipsIE(InfoExtractor):
96 IE_NAME = 'abcotvs:clips'
97 _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
98 _TEST = {
99 'url': 'https://clips.abcotvs.com/kabc/video/214814',
100 'info_dict': {
101 'id': '214814',
102 'ext': 'mp4',
103 'title': 'SpaceX launch pad explosion destroys rocket, satellite',
104 'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
105 'upload_date': '20160901',
106 'timestamp': 1472756695,
107 },
108 'params': {
109 # m3u8 download
110 'skip_download': True,
111 },
112 }
113
114 def _real_extract(self, url):
115 video_id = self._match_id(url)
116 video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
117 title = video_data['title']
118 formats = self._extract_m3u8_formats(
119 video_data['videoURL'].split('?')[0], video_id, 'mp4')
120
121 return {
122 'id': video_id,
123 'title': title,
124 'description': video_data.get('description'),
125 'thumbnail': video_data.get('thumbnailURL'),
126 'duration': int_or_none(video_data.get('duration')),
127 'timestamp': int_or_none(video_data.get('pubDate')),
128 'formats': formats,
129 }