]>
Commit | Line | Data |
---|---|---|
29f7c58a | 1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | extract_attributes, | |
9 | int_or_none, | |
10 | parse_iso8601, | |
11 | try_get, | |
12 | ) | |
13 | ||
14 | ||
15 | class ArcPublishingIE(InfoExtractor): | |
16 | _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' | |
17 | _VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX | |
18 | _TESTS = [{ | |
19 | # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ | |
20 | 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', | |
21 | 'only_matching': True, | |
22 | }, { | |
23 | # https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/ | |
24 | 'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1', | |
25 | 'only_matching': True, | |
26 | }, { | |
27 | # https://www.actionnewsjax.com/video/live-stream/ | |
28 | 'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a', | |
29 | 'only_matching': True, | |
30 | }, { | |
31 | # https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/ | |
32 | 'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3', | |
33 | 'only_matching': True, | |
34 | }, { | |
35 | # https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/ | |
36 | 'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe', | |
37 | 'only_matching': True, | |
38 | }, { | |
39 | # https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/ | |
40 | 'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e', | |
41 | 'only_matching': True, | |
42 | }, { | |
43 | # https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/ | |
44 | 'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143', | |
45 | 'only_matching': True, | |
46 | }, { | |
47 | # https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/ | |
48 | 'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055', | |
49 | 'only_matching': True, | |
50 | }, { | |
51 | # https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/ | |
52 | 'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d', | |
53 | 'only_matching': True, | |
54 | }, { | |
55 | # https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/ | |
56 | 'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7', | |
57 | 'only_matching': True, | |
58 | }, { | |
59 | # https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/ | |
60 | 'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b', | |
61 | 'only_matching': True, | |
62 | }, { | |
63 | # https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html | |
64 | 'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685', | |
65 | 'only_matching': True, | |
66 | }] | |
67 | _POWA_DEFAULTS = [ | |
68 | (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'), | |
69 | ([ | |
70 | 'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo', | |
71 | 'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom', | |
72 | 'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek', | |
73 | ], 'video-api-cdn.%s.arcpublishing.com/api'), | |
74 | ] | |
75 | ||
76 | @staticmethod | |
77 | def _extract_urls(webpage): | |
78 | entries = [] | |
79 | # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview | |
80 | for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): | |
81 | powa = extract_attributes(powa_el) or {} | |
82 | org = powa.get('data-org') | |
83 | uuid = powa.get('data-uuid') | |
84 | if org and uuid: | |
85 | entries.append('arcpublishing:%s:%s' % (org, uuid)) | |
86 | return entries | |
87 | ||
88 | def _real_extract(self, url): | |
5ad28e7f | 89 | org, uuid = self._match_valid_url(url).groups() |
29f7c58a | 90 | for orgs, tmpl in self._POWA_DEFAULTS: |
91 | if org in orgs: | |
92 | base_api_tmpl = tmpl | |
93 | break | |
94 | else: | |
95 | base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api' | |
96 | if org == 'wapo': | |
97 | org = 'washpost' | |
98 | video = self._download_json( | |
99 | 'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org), | |
100 | uuid, query={'uuid': uuid})[0] | |
101 | title = video['headlines']['basic'] | |
102 | is_live = video.get('status') == 'live' | |
103 | ||
104 | urls = [] | |
105 | formats = [] | |
106 | for s in video.get('streams', []): | |
107 | s_url = s.get('url') | |
108 | if not s_url or s_url in urls: | |
109 | continue | |
110 | urls.append(s_url) | |
111 | stream_type = s.get('stream_type') | |
112 | if stream_type == 'smil': | |
113 | smil_formats = self._extract_smil_formats( | |
114 | s_url, uuid, fatal=False) | |
115 | for f in smil_formats: | |
116 | if f['url'].endswith('/cfx/st'): | |
117 | f['app'] = 'cfx/st' | |
118 | if not f['play_path'].startswith('mp4:'): | |
119 | f['play_path'] = 'mp4:' + f['play_path'] | |
120 | if isinstance(f['tbr'], float): | |
121 | f['vbr'] = f['tbr'] * 1000 | |
122 | del f['tbr'] | |
123 | f['format_id'] = 'rtmp-%d' % f['vbr'] | |
124 | formats.extend(smil_formats) | |
125 | elif stream_type in ('ts', 'hls'): | |
126 | m3u8_formats = self._extract_m3u8_formats( | |
a5c0c202 | 127 | s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) |
29f7c58a | 128 | if all([f.get('acodec') == 'none' for f in m3u8_formats]): |
129 | continue | |
130 | for f in m3u8_formats: | |
29f7c58a | 131 | height = f.get('height') |
132 | if not height: | |
133 | continue | |
134 | vbr = self._search_regex( | |
135 | r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None) | |
136 | if vbr: | |
137 | f['vbr'] = int(vbr) | |
138 | formats.extend(m3u8_formats) | |
139 | else: | |
140 | vbr = int_or_none(s.get('bitrate')) | |
141 | formats.append({ | |
142 | 'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, | |
143 | 'vbr': vbr, | |
144 | 'width': int_or_none(s.get('width')), | |
145 | 'height': int_or_none(s.get('height')), | |
146 | 'filesize': int_or_none(s.get('filesize')), | |
147 | 'url': s_url, | |
f983b875 | 148 | 'quality': -10, |
29f7c58a | 149 | }) |
54f37eea | 150 | self._sort_formats(formats) |
29f7c58a | 151 | |
152 | subtitles = {} | |
153 | for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []): | |
154 | subtitle_url = subtitle.get('url') | |
155 | if subtitle_url: | |
156 | subtitles.setdefault('en', []).append({'url': subtitle_url}) | |
157 | ||
158 | return { | |
159 | 'id': uuid, | |
39ca3b5c | 160 | 'title': title, |
29f7c58a | 161 | 'thumbnail': try_get(video, lambda x: x['promo_image']['url']), |
162 | 'description': try_get(video, lambda x: x['subheadlines']['basic']), | |
163 | 'formats': formats, | |
164 | 'duration': int_or_none(video.get('duration'), 100), | |
165 | 'timestamp': parse_iso8601(video.get('created_date')), | |
166 | 'subtitles': subtitles, | |
167 | 'is_live': is_live, | |
168 | } |