]>
Commit | Line | Data |
---|---|---|
29f7c58a | 1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | extract_attributes, | |
6 | int_or_none, | |
6aaf96a3 | 7 | join_nonempty, |
29f7c58a | 8 | parse_iso8601, |
9 | try_get, | |
10 | ) | |
11 | ||
12 | ||
13 | class ArcPublishingIE(InfoExtractor): | |
14 | _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' | |
add96eb9 | 15 | _VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})' |
29f7c58a | 16 | _TESTS = [{ |
17 | # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ | |
18 | 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', | |
19 | 'only_matching': True, | |
20 | }, { | |
21 | # https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/ | |
22 | 'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1', | |
23 | 'only_matching': True, | |
24 | }, { | |
25 | # https://www.actionnewsjax.com/video/live-stream/ | |
26 | 'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a', | |
27 | 'only_matching': True, | |
28 | }, { | |
29 | # https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/ | |
30 | 'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3', | |
31 | 'only_matching': True, | |
32 | }, { | |
33 | # https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/ | |
34 | 'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe', | |
35 | 'only_matching': True, | |
36 | }, { | |
37 | # https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/ | |
38 | 'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e', | |
39 | 'only_matching': True, | |
40 | }, { | |
41 | # https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/ | |
42 | 'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143', | |
43 | 'only_matching': True, | |
44 | }, { | |
45 | # https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/ | |
46 | 'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055', | |
47 | 'only_matching': True, | |
48 | }, { | |
49 | # https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/ | |
50 | 'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d', | |
51 | 'only_matching': True, | |
52 | }, { | |
53 | # https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/ | |
54 | 'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7', | |
55 | 'only_matching': True, | |
56 | }, { | |
57 | # https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/ | |
58 | 'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b', | |
59 | 'only_matching': True, | |
60 | }, { | |
61 | # https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html | |
62 | 'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685', | |
63 | 'only_matching': True, | |
64 | }] | |
65 | _POWA_DEFAULTS = [ | |
66 | (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'), | |
67 | ([ | |
68 | 'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo', | |
69 | 'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom', | |
70 | 'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek', | |
71 | ], 'video-api-cdn.%s.arcpublishing.com/api'), | |
72 | ] | |
73 | ||
bfd973ec | 74 | @classmethod |
75 | def _extract_embed_urls(cls, url, webpage): | |
29f7c58a | 76 | entries = [] |
77 | # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview | |
add96eb9 | 78 | for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage): |
29f7c58a | 79 | powa = extract_attributes(powa_el) or {} |
80 | org = powa.get('data-org') | |
81 | uuid = powa.get('data-uuid') | |
82 | if org and uuid: | |
add96eb9 | 83 | entries.append(f'arcpublishing:{org}:{uuid}') |
29f7c58a | 84 | return entries |
85 | ||
86 | def _real_extract(self, url): | |
5ad28e7f | 87 | org, uuid = self._match_valid_url(url).groups() |
29f7c58a | 88 | for orgs, tmpl in self._POWA_DEFAULTS: |
89 | if org in orgs: | |
90 | base_api_tmpl = tmpl | |
91 | break | |
92 | else: | |
93 | base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api' | |
94 | if org == 'wapo': | |
95 | org = 'washpost' | |
96 | video = self._download_json( | |
97 | 'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org), | |
98 | uuid, query={'uuid': uuid})[0] | |
99 | title = video['headlines']['basic'] | |
100 | is_live = video.get('status') == 'live' | |
101 | ||
102 | urls = [] | |
103 | formats = [] | |
104 | for s in video.get('streams', []): | |
105 | s_url = s.get('url') | |
106 | if not s_url or s_url in urls: | |
107 | continue | |
108 | urls.append(s_url) | |
109 | stream_type = s.get('stream_type') | |
110 | if stream_type == 'smil': | |
111 | smil_formats = self._extract_smil_formats( | |
112 | s_url, uuid, fatal=False) | |
113 | for f in smil_formats: | |
114 | if f['url'].endswith('/cfx/st'): | |
115 | f['app'] = 'cfx/st' | |
116 | if not f['play_path'].startswith('mp4:'): | |
117 | f['play_path'] = 'mp4:' + f['play_path'] | |
118 | if isinstance(f['tbr'], float): | |
119 | f['vbr'] = f['tbr'] * 1000 | |
120 | del f['tbr'] | |
121 | f['format_id'] = 'rtmp-%d' % f['vbr'] | |
122 | formats.extend(smil_formats) | |
123 | elif stream_type in ('ts', 'hls'): | |
124 | m3u8_formats = self._extract_m3u8_formats( | |
a5c0c202 | 125 | s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) |
add96eb9 | 126 | if all(f.get('acodec') == 'none' for f in m3u8_formats): |
29f7c58a | 127 | continue |
128 | for f in m3u8_formats: | |
29f7c58a | 129 | height = f.get('height') |
130 | if not height: | |
131 | continue | |
132 | vbr = self._search_regex( | |
133 | r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None) | |
134 | if vbr: | |
135 | f['vbr'] = int(vbr) | |
136 | formats.extend(m3u8_formats) | |
137 | else: | |
138 | vbr = int_or_none(s.get('bitrate')) | |
139 | formats.append({ | |
6aaf96a3 | 140 | 'format_id': join_nonempty(stream_type, vbr), |
29f7c58a | 141 | 'vbr': vbr, |
142 | 'width': int_or_none(s.get('width')), | |
143 | 'height': int_or_none(s.get('height')), | |
144 | 'filesize': int_or_none(s.get('filesize')), | |
145 | 'url': s_url, | |
f983b875 | 146 | 'quality': -10, |
29f7c58a | 147 | }) |
29f7c58a | 148 | |
149 | subtitles = {} | |
150 | for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []): | |
151 | subtitle_url = subtitle.get('url') | |
152 | if subtitle_url: | |
153 | subtitles.setdefault('en', []).append({'url': subtitle_url}) | |
154 | ||
155 | return { | |
156 | 'id': uuid, | |
39ca3b5c | 157 | 'title': title, |
29f7c58a | 158 | 'thumbnail': try_get(video, lambda x: x['promo_image']['url']), |
159 | 'description': try_get(video, lambda x: x['subheadlines']['basic']), | |
160 | 'formats': formats, | |
161 | 'duration': int_or_none(video.get('duration'), 100), | |
162 | 'timestamp': parse_iso8601(video.get('created_date')), | |
163 | 'subtitles': subtitles, | |
164 | 'is_live': is_live, | |
165 | } |