]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/footyroom.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / footyroom.py
1 from .common import InfoExtractor
2 from .streamable import StreamableIE
3
4
5 class FootyRoomIE(InfoExtractor):
6 _VALID_URL = r'https?://footyroom\.com/matches/(?P<id>\d+)'
7 _TESTS = [{
8 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review',
9 'info_dict': {
10 'id': '79922154',
11 'title': 'VIDEO Hull City 0 - 2 Chelsea',
12 },
13 'playlist_count': 2,
14 'add_ie': [StreamableIE.ie_key()],
15 }, {
16 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
17 'info_dict': {
18 'id': '75817984',
19 'title': 'VIDEO Georgia 0 - 2 Germany',
20 },
21 'playlist_count': 1,
22 'add_ie': ['Playwire']
23 }]
24
25 def _real_extract(self, url):
26 playlist_id = self._match_id(url)
27
28 webpage = self._download_webpage(url, playlist_id)
29
30 playlist = self._parse_json(self._search_regex(
31 r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'),
32 playlist_id)
33
34 playlist_title = self._og_search_title(webpage)
35
36 entries = []
37 for video in playlist:
38 payload = video.get('payload')
39 if not payload:
40 continue
41 playwire_url = self._html_search_regex(
42 r'data-config="([^"]+)"', payload,
43 'playwire url', default=None)
44 if playwire_url:
45 entries.append(self.url_result(self._proto_relative_url(
46 playwire_url, 'http:'), 'Playwire'))
47
48 streamable_url = StreamableIE._extract_url(payload)
49 if streamable_url:
50 entries.append(self.url_result(
51 streamable_url, StreamableIE.ie_key()))
52
53 return self.playlist_result(entries, playlist_id, playlist_title)