]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/reuters.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / reuters.py
CommitLineData
0c50eeb9 1import re
2
3from .common import InfoExtractor
4from ..utils import (
0c50eeb9 5 int_or_none,
e897bd82 6 js_to_json,
0c50eeb9 7 unescapeHTML,
8)
9
10
11class ReutersIE(InfoExtractor):
df773c3d 12 _WORKING = False
0c50eeb9 13 _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
14 _TEST = {
15 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
16 'md5': '8015113643a0b12838f160b0b81cc2ee',
17 'info_dict': {
18 'id': '368575562',
19 'ext': 'mp4',
20 'title': 'San Francisco police chief resigns',
21 }
22 }
23
24 def _real_extract(self, url):
25 video_id = self._match_id(url)
26 webpage = self._download_webpage(
27 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
28 video_data = js_to_json(self._search_regex(
29 r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
30 webpage, 'video data'))
31
32 def get_json_value(key, fatal=False):
ec85ded8 33 return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
0c50eeb9 34
35 title = unescapeHTML(get_json_value('title', fatal=True))
36 mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
37
38 mas_data = self._download_json(
39 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
40 video_id, transform_source=js_to_json)
41 formats = []
42 for f in mas_data:
43 f_url = f.get('url')
44 if not f_url:
45 continue
46 method = f.get('method')
47 if method == 'hls':
48 formats.extend(self._extract_m3u8_formats(
49 f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
50 else:
51 container = f.get('container')
52 ext = '3gp' if method == 'mobile' else container
53 formats.append({
54 'format_id': ext,
55 'url': f_url,
56 'ext': ext,
57 'container': container if method != 'mobile' else None,
58 })
0c50eeb9 59
60 return {
61 'id': video_id,
62 'title': title,
63 'thumbnail': get_json_value('thumb'),
64 'duration': int_or_none(get_json_value('seconds')),
65 'formats': formats,
66 }