]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/cbslocal.py
[fox9] Add extractor (closes #11110)
[yt-dlp.git] / youtube_dl / extractor / cbslocal.py
CommitLineData
661d46b2
YCH
1# coding: utf-8
2from __future__ import unicode_literals
3
661d46b2
YCH
4from .anvato import AnvatoIE
5from .sendtonews import SendtoNewsIE
6from ..compat import compat_urlparse
962250f7 7from ..utils import unified_timestamp
661d46b2
YCH
8
9
10class CBSLocalIE(AnvatoIE):
11 _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
12
13 _TESTS = [{
14 # Anvato backend
15 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
16 'md5': 'f0ee3081e3843f575fccef901199b212',
17 'info_dict': {
18 'id': '3401037',
19 'ext': 'mp4',
20 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
21 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
22 'thumbnail': 're:^https?://.*',
23 'timestamp': 1463440500,
24 'upload_date': '20160516',
25 'subtitles': {
26 'en': 'mincount:5',
27 },
28 'categories': [
29 'Stations\\Spoken Word\\KCBSTV',
30 'Syndication\\MSN',
31 'Syndication\\NDN',
32 'Syndication\\AOL',
33 'Syndication\\Yahoo',
34 'Syndication\\Tribune',
35 'Syndication\\Curb.tv',
36 'Content\\News'
37 ],
38 },
39 }, {
40 # SendtoNews embed
41 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
42 'info_dict': {
43 'id': 'GxfCe0Zo7D-175909-5588',
661d46b2 44 },
760845ce 45 'playlist_count': 9,
661d46b2
YCH
46 'params': {
47 # m3u8 download
48 'skip_download': True,
49 },
50 }]
51
52 def _real_extract(self, url):
53 display_id = self._match_id(url)
54 webpage = self._download_webpage(url, display_id)
55
56 sendtonews_url = SendtoNewsIE._extract_url(webpage)
57 if sendtonews_url:
760845ce
YCH
58 return self.url_result(
59 compat_urlparse.urljoin(url, sendtonews_url),
60 ie=SendtoNewsIE.ie_key())
61
62 info_dict = self._extract_anvato_videos(webpage, display_id)
661d46b2
YCH
63
64 time_str = self._html_search_regex(
65 r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
962250f7 66 timestamp = unified_timestamp(time_str)
661d46b2
YCH
67
68 info_dict.update({
69 'display_id': display_id,
70 'timestamp': timestamp,
71 })
72
73 return info_dict