2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..compat
import compat_str
15 class GediBaseIE(InfoExtractor
):
17 def _clean_audio_fmts(formats
):
21 unique_formats
.append(f
)
22 formats
[:] = unique_formats
24 def _real_extract(self
, url
):
25 video_id
= self
._match
_id
(url
)
27 webpage
= self
._download
_webpage
(url
, video_id
)
28 player_data
= re
.findall(
29 r
'PlayerFactory\.setParam\(\'(?P
<type>.+?
)\',\s
*\'(?P
<name
>.+?
)\',\s
*\'(?P
<val
>.+?
)\'\
);',
39 fmt_reg = r'(?P
<t
>video|audio
)-(?P
<p
>rrtv|hls
)-(?P
<h
>[\w\d
]+)(?
:-(?P
<br
>[\w\d
]+))?$
'
40 br_reg = r'video
-rrtv
-(?P
<br
>\d
+)-'
42 for t, n, v in player_data:
44 m = re.match(fmt_reg, n)
47 if m.group('t
') == 'audio
':
48 if m.group('p
') == 'hls
':
49 audio_fmts.extend(self._extract_m3u8_formats(
50 v, video_id, 'm4a
', m3u8_id='hls
', fatal=False))
51 elif m.group('p
') == 'rrtv
':
62 elif m.group('t
') == 'video
':
64 if m.group('p
') == 'hls
':
65 hls_fmts.extend(self._extract_m3u8_formats(
66 v, video_id, 'mp4
', m3u8_id='hls
', fatal=False))
68 elif m.group('p
') == 'rrtv
':
70 mm = re.search(br_reg, v)
72 'format_id
': 'https
-' + m.group('h
'),
75 'tbr
': int(m.group('br
')) if m.group('br
') else
76 (int(mm.group('br
')) if mm.group('br
') else 0),
77 'height
': int(m.group('h
'))
83 if n == 'image_full_play
':
86 title = self._og_search_title(webpage) if title == '' else title
89 title = compat_str(title).encode('utf8
', 'replace
').replace(b'\xc3\x82', b'').decode('utf8
', 'replace
')
92 self._clean_audio_fmts(audio_fmts)
93 self._sort_formats(audio_fmts)
95 self._sort_formats(hls_fmts)
97 self._sort_formats(http_fmts)
99 formats.extend(audio_fmts)
100 formats.extend(hls_fmts)
101 formats.extend(http_fmts)
106 'description
': self._html_search_meta('twitter
:description
', webpage),
112 class GediIE(GediBaseIE):
113 _VALID_URL = r'''(?x)https?://video\.
115 (?:espresso\.)?repubblica
132 (?:\.gelocal)?\.it/(?!embed/).+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
134 'url
': 'https
://video
.lastampa
.it
/politica
/il
-paradosso
-delle
-regionali
-la
-lega
-vince
-ma
-sembra
-aver
-perso
/121559/121683',
135 'md5
': '84658d7fb9e55a6e57ecc77b73137494
',
137 'id': '121559/121683',
139 'title
': 'Il paradosso delle Regionali
: ecco perché la Lega vince ma sembra aver perso
',
140 'description
': 'md5
:de7f4d6eaaaf36c153b599b10f8ce7ca
',
141 'thumbnail
': r're
:^https
://www\
.repstatic\
.it
/video
/photo
/.+?
-thumb
-social
-play\
.jpg$
',
144 'url
': 'https
://video
.repubblica
.it
/motori
/record
-della
-pista
-a
-spa
-francorchamps
-la
-pagani
-huayra
-roadster
-bc
-stupisce
/367415/367963',
145 'md5
': 'e763b94b7920799a0e0e23ffefa2d157
',
147 'id': '367415/367963',
149 'title
': 'Record della pista a Spa Francorchamps
, la Pagani Huayra Roadster BC stupisce
',
150 'description
': 'md5
:5deb503cefe734a3eb3f07ed74303920
',
151 'thumbnail
': r're
:^https
://www\
.repstatic\
.it
/video
/photo
/.+?
-thumb
-social
-play\
.jpg$
',
154 'url
': 'https
://video
.ilsecoloxix
.it
/sport
/cassani
-e
-i
-brividi
-azzurri
-ai
-mondiali
-di
-imola
-qui
-mi
-sono
-innamorato
-del-ciclismo
-da
-ragazzino
-incredibile
-tornarci
-da
-ct
/66184/66267',
155 'md5
': 'e48108e97b1af137d22a8469f2019057
',
159 'title
': 'Cassani e i brividi azzurri ai Mondiali di Imola
: \\"Qui mi sono innamorato del ciclismo da ragazzino, incredibile tornarci da ct\\"',
160 'description
': 'md5
:fc9c50894f70a2469bb9b54d3d0a3d3b
',
161 'thumbnail
': r're
:^https
://www\
.repstatic\
.it
/video
/photo
/.+?
-thumb
-social
-play\
.jpg$
',
164 'url
': 'https
://video
.iltirreno
.gelocal
.it
/sport
/dentro
-la
-notizia
-ferrari
-cosa
-succede
-a
-maranello
/141059/142723',
165 'md5
': 'a6e39f3bdc1842bbd92abbbbef230817
',
167 'id': '141059/142723',
169 'title
': 'Dentro la notizia
- Ferrari
, cosa succede a Maranello
',
170 'description
': 'md5
:9907d65b53765681fa3a0b3122617c1f
',
171 'thumbnail
': r're
:^https
://www\
.repstatic\
.it
/video
/photo
/.+?
-thumb
-social
-play\
.jpg$
',
176 class GediEmbedsIE(GediBaseIE):
177 _VALID_URL = r'''(?x)https?://video\.
179 (?:espresso\.)?repubblica
196 (?:\.gelocal)?\.it/embed/.+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
198 'url
': 'https
://video
.huffingtonpost
.it
/embed
/politica
/cotticelli
-non
-so
-cosa
-mi
-sia
-successo
-sto
-cercando
-di
-capire
-se
-ho
-avuto
-un
-malore
/29312/29276?responsive
=true
&el
=video971040871621586700
',
199 'md5
': 'f4ac23cadfea7fef89bea536583fa7ed
',
203 'title
': 'Cotticelli
: \\"Non so cosa mi sia successo. Sto cercando di capire se ho avuto un malore\\"',
204 'description
': 'md5
:d41d8cd98f00b204e9800998ecf8427e
',
205 'thumbnail
': r're
:^https
://www\
.repstatic\
.it
/video
/photo
/.+?
-thumb
-social
-play\
.jpg$
',
208 'url
': 'https
://video
.espresso
.repubblica
.it
/embed
/tutti
-i
-video
/01-ted
-villa
/14772/14870&width
=640&height
=360',
209 'md5
': '0391c2c83c6506581003aaf0255889c0
',
213 'title
': 'Festival EMERGENCY
, Villa
: «La buona informazione aiuta la salute»
(14772-14870)',
214 'description
': 'md5
:2bce954d278248f3c950be355b7c2226
',
215 'thumbnail
': r're
:^https
://www\
.repstatic\
.it
/video
/photo
/.+?
-thumb
-social
-play\
.jpg$
',
220 def _sanitize_urls(urls):
221 # add protocol if missing
222 for i, e in enumerate(urls):
223 if e.startswith('//'):
224 urls[i] = 'https
:%s' % e
226 for i, e in enumerate(urls):
227 urls[i] = urljoin(base_url(e), url_basename(e))
231 def _extract_urls(webpage):
234 for mobj in re.finditer(r'''(?x)
240 (?P
<url
>https?
://video\
.
242 (?
:espresso\
.)?repubblica
259 (?
:\
.gelocal
)?\
.it
/embed
/.+?
)
261 return GediEmbedsIE._sanitize_urls(entries)
264 def _extract_url(webpage):
265 urls = GediEmbedsIE._extract_urls(webpage)
266 return urls[0] if urls else None