]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/caltrans.py
[Caltrans] Add extractor (#2781)
[yt-dlp.git] / yt_dlp / extractor / caltrans.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6
7 class CaltransIE(InfoExtractor):
8 _VALID_URL = r'https?://(?:[^/]+\.)?ca\.gov/vm/loc/[^/]+/(?P<id>[a-z0-9_]+)\.htm'
9 _TEST = {
10 'url': 'https://cwwp2.dot.ca.gov/vm/loc/d3/hwy50at24th.htm',
11 'info_dict': {
12 'id': 'hwy50at24th',
13 'ext': 'ts',
14 'title': 'US-50 : Sacramento : Hwy 50 at 24th',
15 'live_status': 'is_live',
16 'thumbnail': 'https://cwwp2.dot.ca.gov/data/d3/cctv/image/hwy50at24th/hwy50at24th.jpg',
17 }
18 }
19
20 def _real_extract(self, url):
21 video_id = self._match_id(url)
22 webpage = self._download_webpage(url, video_id)
23
24 global_vars = self._search_regex(
25 r'<script[^<]+?([^<]+\.m3u8[^<]+)</script>',
26 webpage, 'Global Vars')
27 route_place = self._search_regex(r'routePlace\s*=\s*"([^"]+)"', global_vars, 'Route Place', fatal=False)
28 location_name = self._search_regex(r'locationName\s*=\s*"([^"]+)"', global_vars, 'Location Name', fatal=False)
29 poster_url = self._search_regex(r'posterURL\s*=\s*"([^"]+)"', global_vars, 'Poster Url', fatal=False)
30 video_stream = self._search_regex(r'videoStreamURL\s*=\s*"([^"]+)"', global_vars, 'Video Stream URL', fatal=False)
31
32 formats = self._extract_m3u8_formats(video_stream, video_id, 'ts', live=True)
33 self._sort_formats(formats)
34
35 return {
36 'id': video_id,
37 'title': f'{route_place} : {location_name}',
38 'is_live': True,
39 'formats': formats,
40 'thumbnail': poster_url,
41 }