2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
20 class GlomexBaseIE(InfoExtractor
):
21 _DEFAULT_ORIGIN_URL
= 'https://player.glomex.com/'
22 _API_URL
= 'https://integration-cloudfront-eu-west-1.mes.glomex.cloud/'
25 def _smuggle_origin_url(url
, origin_url
):
26 if origin_url
is None:
28 return smuggle_url(url
, {'origin': origin_url}
)
31 def _unsmuggle_origin_url(cls
, url
, fallback_origin_url
=None):
32 defaults
= {'origin': fallback_origin_url or cls._DEFAULT_ORIGIN_URL}
33 unsmuggled_url
, data
= unsmuggle_url(url
, default
=defaults
)
34 return unsmuggled_url
, data
['origin']
36 def _get_videoid_type(self
, video_id
):
40 'rl': 'related videos playlist',
41 'cl': 'curated playlist',
43 prefix
= video_id
.split('-')[0]
44 return _VIDEOID_TYPES
.get(prefix
, 'unknown type')
46 def _download_api_data(self
, video_id
, integration
, current_url
=None):
48 'integration_id': integration
,
49 'playlist_id': video_id
,
50 'current_url': current_url
or self
._DEFAULT
_ORIGIN
_URL
,
52 video_id_type
= self
._get
_videoid
_type
(video_id
)
53 return self
._download
_json
(
55 video_id
, 'Downloading %s JSON' % video_id_type
,
56 'Unable to download %s JSON' % video_id_type
,
59 def _download_and_extract_api_data(self
, video_id
, integration
, current_url
):
60 api_data
= self
._download
_api
_data
(video_id
, integration
, current_url
)
61 videos
= api_data
['videos']
63 raise ExtractorError('no videos found for %s' % video_id
)
64 videos
= [self
._extract
_api
_data
(video
, video_id
) for video
in videos
]
65 return videos
[0] if len(videos
) == 1 else self
.playlist_result(videos
, video_id
)
67 def _extract_api_data(self
, video
, video_id
):
68 if video
.get('error_code') == 'contentGeoblocked':
69 self
.raise_geo_restricted(countries
=video
['geo_locations'])
71 formats
, subs
= [], {}
72 for format_id
, format_url
in video
['source'].items():
73 ext
= determine_ext(format_url
)
75 formats_
, subs_
= self
._extract
_m
3u8_formats
_and
_subtitles
(
76 format_url
, video_id
, 'mp4', m3u8_id
=format_id
,
78 formats
.extend(formats_
)
79 self
._merge
_subtitles
(subs_
, target
=subs
)
83 'format_id': format_id
,
85 if video
.get('language'):
87 fmt
['language'] = video
['language']
88 self
._sort
_formats
(formats
)
90 images
= (video
.get('images') or []) + [video
.get('image') or {}]
92 'id': image
.get('id'),
93 'url': f
'{image["url"]}/profile:player-960x540',
96 } for image
in images
if image
.get('url')]
97 self
._remove
_duplicate
_formats
(thumbnails
)
100 'id': video
.get('clip_id') or video_id
,
101 'title': video
.get('title'),
102 'description': video
.get('description'),
103 'thumbnails': thumbnails
,
104 'duration': int_or_none(video
.get('clip_duration')),
105 'timestamp': video
.get('created_at'),
111 class GlomexIE(GlomexBaseIE
):
113 IE_DESC
= 'Glomex videos'
114 _VALID_URL
= r
'https?://video\.glomex\.com/[^/]+/(?P<id>v-[^-]+)'
115 _INTEGRATION_ID
= '19syy24xjn1oqlpc'
118 'url': 'https://video.glomex.com/sport/v-cb24uwg77hgh-nach-2-0-sieg-guardiola-mit-mancity-vor-naechstem-titel',
119 'md5': 'cec33a943c4240c9cb33abea8c26242e',
121 'id': 'v-cb24uwg77hgh',
123 'title': 'md5:38a90cedcfadd72982c81acf13556e0c',
124 'description': 'md5:1ea6b6caff1443fcbbba159e432eedb8',
126 'timestamp': 1619895017,
127 'upload_date': '20210501',
131 def _real_extract(self
, url
):
132 video_id
= self
._match
_id
(url
)
133 return self
.url_result(
134 GlomexEmbedIE
.build_player_url(video_id
, self
._INTEGRATION
_ID
, url
),
135 GlomexEmbedIE
.ie_key(), video_id
)
138 class GlomexEmbedIE(GlomexBaseIE
):
139 IE_NAME
= 'glomex:embed'
140 IE_DESC
= 'Glomex embedded videos'
141 _BASE_PLAYER_URL
= '//player.glomex.com/integration/1/iframe-player.html'
142 _BASE_PLAYER_URL_RE
= re
.escape(_BASE_PLAYER_URL
).replace('/1/', r
'/[^/]/')
143 _VALID_URL
= rf
'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?playlistId=(?P<id>[^#&]+)'
146 'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf',
147 'md5': '68f259b98cc01918ac34180142fce287',
149 'id': 'v-cfa6lye0dkdd-sf',
151 'timestamp': 1635337199,
153 'upload_date': '20211027',
154 'description': 'md5:e741185fc309310ff5d0c789b437be66',
155 'title': 'md5:35647293513a6c92363817a0fb0a7961',
158 'url': 'https://player.glomex.com/integration/1/iframe-player.html?origin=fullpage&integrationId=19syy24xjn1oqlpc&playlistId=rl-vcb49w1fb592p&playlistIndex=0',
160 'id': 'rl-vcb49w1fb592p',
162 'playlist_count': 100,
164 'url': 'https://player.glomex.com/integration/1/iframe-player.html?playlistId=cl-bgqaata6aw8x&integrationId=19syy24xjn1oqlpc',
166 'id': 'cl-bgqaata6aw8x',
168 'playlist_mincount': 2,
172 def build_player_url(cls
, video_id
, integration
, origin_url
=None):
173 query_string
= urllib
.parse
.urlencode({
174 'playlistId': video_id
,
175 'integrationId': integration
,
177 return cls
._smuggle
_origin
_url
(f
'https:{cls._BASE_PLAYER_URL}?{query_string}', origin_url
)
180 def _extract_urls(cls
, webpage
, origin_url
):
181 # https://docs.glomex.com/publisher/video-player-integration/javascript-api/
185 <iframe[^>]+?src=(?P<q>{quot_re})(?P<url>
186 (?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
188 for mobj in re.finditer(regex, webpage):
189 url = unescapeHTML(mobj.group('url
'))
190 if cls.suitable(url):
191 yield cls._smuggle_origin_url(url, origin_url)
194 <glomex-player [^>]+?>|
195 <div[^>]* data-glomex-player=(?P<q>{quot_re})true(?P=q)[^>]*>'''
196 for mobj in re.finditer(regex, webpage):
197 attrs = extract_attributes(mobj.group(0))
198 if attrs.get('data
-integration
-id') and attrs.get('data
-playlist
-id'):
199 yield cls.build_player_url(attrs['data
-playlist
-id'], attrs['data
-integration
-id'], origin_url)
201 # naive parsing of inline scripts for hard-coded integration parameters
203 (?P<is_js>dataset\.)?%s\s*(?(is_js)=|:)\s*
204 (?P<q>{quot_re})(?P<id>(?:(?!(?P=q)).)+)(?P=q)\s'''
205 for mobj in re.finditer(r'(?x
)<script
[^
<]*>.+?
</script
>', webpage):
206 script = mobj.group(0)
207 integration_id = re.search(regex % 'integrationId
', script)
208 if not integration_id:
210 playlist_id = re.search(regex % 'playlistId
', script)
212 yield cls.build_player_url(playlist_id, integration_id, origin_url)
214 def _real_extract(self, url):
215 url, origin_url = self._unsmuggle_origin_url(url)
216 playlist_id = self._match_id(url)
217 integration = parse_qs(url).get('integrationId
', [None])[0]
219 raise ExtractorError('No integrationId
in URL
', expected=True)
220 return self._download_and_extract_api_data(playlist_id, integration, origin_url)