]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/turbo.py
[ie/twitch] Fix m3u8 extraction (#8960)
[yt-dlp.git] / yt_dlp / extractor / turbo.py
1 import re
2
3 from .common import InfoExtractor
4 from ..compat import compat_str
5 from ..utils import (
6 ExtractorError,
7 int_or_none,
8 qualities,
9 xpath_text,
10 )
11
12
13 class TurboIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
15 _API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
16 _TEST = {
17 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
18 'md5': '33f4b91099b36b5d5a91f84b5bcba600',
19 'info_dict': {
20 'id': '454443',
21 'ext': 'mp4',
22 'duration': 3715,
23 'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
24 'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
25 'thumbnail': r're:^https?://.*\.jpg$',
26 }
27 }
28
29 def _real_extract(self, url):
30 mobj = self._match_valid_url(url)
31 video_id = mobj.group('id')
32
33 webpage = self._download_webpage(url, video_id)
34
35 playlist = self._download_xml(self._API_URL.format(video_id), video_id)
36 item = playlist.find('./channel/item')
37 if item is None:
38 raise ExtractorError('Playlist item was not found', expected=True)
39
40 title = xpath_text(item, './title', 'title')
41 duration = int_or_none(xpath_text(item, './durate', 'duration'))
42 thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
43 description = self._html_search_meta('description', webpage)
44
45 formats = []
46 get_quality = qualities(['3g', 'sd', 'hq'])
47 for child in item:
48 m = re.search(r'url_video_(?P<quality>.+)', child.tag)
49 if m:
50 quality = compat_str(m.group('quality'))
51 formats.append({
52 'format_id': quality,
53 'url': child.text,
54 'quality': get_quality(quality),
55 })
56
57 return {
58 'id': video_id,
59 'title': title,
60 'duration': duration,
61 'thumbnail': thumbnail,
62 'description': description,
63 'formats': formats,
64 }