]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/tfo.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / tfo.py
1 import json
2
3 from .common import InfoExtractor
4 from ..networking import HEADRequest
5 from ..utils import ExtractorError, clean_html, int_or_none
6
7
8 class TFOIE(InfoExtractor):
9 _GEO_COUNTRIES = ['CA']
10 _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
11 _TEST = {
12 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
13 'md5': 'cafbe4f47a8dae0ca0159937878100d6',
14 'info_dict': {
15 'id': '7da3d50e495c406b8fc0b997659cc075',
16 'ext': 'mp4',
17 'title': 'Video Game Hackathon',
18 'description': 'md5:558afeba217c6c8d96c60e5421795c07',
19 }
20 }
21
22 def _real_extract(self, url):
23 video_id = self._match_id(url)
24 self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id)
25 infos = self._download_json(
26 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({
27 'product_id': video_id,
28 }).encode(), headers={
29 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
30 })
31 if infos.get('success') == 0:
32 if infos.get('code') == 'ErrGeoBlocked':
33 self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
34 raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True)
35 video_data = infos['data']
36
37 return {
38 '_type': 'url_transparent',
39 'id': video_id,
40 'url': 'limelight:media:' + video_data['llid'],
41 'title': video_data['title'],
42 'description': video_data.get('description'),
43 'series': video_data.get('collection'),
44 'season_number': int_or_none(video_data.get('season')),
45 'episode_number': int_or_none(video_data.get('episode')),
46 'duration': int_or_none(video_data.get('duration')),
47 'ie_key': 'LimelightMedia',
48 }