]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/movingimage.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / movingimage.py
1 from .common import InfoExtractor
2 from ..utils import (
3 parse_duration,
4 unescapeHTML,
5 )
6
7
8 class MovingImageIE(InfoExtractor):
9 _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
10 _TEST = {
11 'url': 'http://movingimage.nls.uk/film/3561',
12 'md5': '4caa05c2b38453e6f862197571a7be2f',
13 'info_dict': {
14 'id': '3561',
15 'ext': 'mp4',
16 'title': 'SHETLAND WOOL',
17 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
18 'duration': 900,
19 'thumbnail': r're:^https?://.*\.jpg$',
20 },
21 }
22
23 def _real_extract(self, url):
24 video_id = self._match_id(url)
25
26 webpage = self._download_webpage(url, video_id)
27
28 formats = self._extract_m3u8_formats(
29 self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
30 video_id, ext='mp4', entry_protocol='m3u8_native')
31
32 def search_field(field_name, fatal=False):
33 return self._search_regex(
34 r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
35 webpage, 'title', fatal=fatal)
36
37 title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
38 description = unescapeHTML(search_field('Description'))
39 duration = parse_duration(search_field('Running time'))
40 thumbnail = self._search_regex(
41 r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
42
43 return {
44 'id': video_id,
45 'formats': formats,
46 'title': title,
47 'description': description,
48 'duration': duration,
49 'thumbnail': thumbnail,
50 }