]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/cbslocal.py
Update to ytdl-2021.01.03
[yt-dlp.git] / youtube_dlc / extractor / cbslocal.py
CommitLineData
661d46b2
YCH
1# coding: utf-8
2from __future__ import unicode_literals
3
661d46b2
YCH
4from .anvato import AnvatoIE
5from .sendtonews import SendtoNewsIE
6from ..compat import compat_urlparse
44444f0d
YCH
7from ..utils import (
8 parse_iso8601,
9 unified_timestamp,
10)
661d46b2
YCH
11
12
13class CBSLocalIE(AnvatoIE):
29f7c58a 14 _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
15 _VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
16
17 _TESTS = [{
18 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
19 'info_dict': {
20 'id': '3580809',
21 'ext': 'mp4',
22 'title': 'A Very Blue Anniversary',
23 'description': 'CBS2’s Cindy Hsu has more.',
24 'thumbnail': 're:^https?://.*',
25 'timestamp': int,
26 'upload_date': r're:^\d{8}$',
27 'uploader': 'CBS',
28 'subtitles': {
29 'en': 'mincount:5',
30 },
31 'categories': [
32 'Stations\\Spoken Word\\WCBSTV',
33 'Syndication\\AOL',
34 'Syndication\\MSN',
35 'Syndication\\NDN',
36 'Syndication\\Yahoo',
37 'Content\\News',
38 'Content\\News\\Local News',
39 ],
40 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
41 },
42 'params': {
43 'skip_download': True,
44 },
45 }]
46
47 def _real_extract(self, url):
48 mcp_id = self._match_id(url)
49 return self.url_result(
50 'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
51
52
53class CBSLocalArticleIE(AnvatoIE):
54 _VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
661d46b2
YCH
55
56 _TESTS = [{
57 # Anvato backend
58 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
59 'md5': 'f0ee3081e3843f575fccef901199b212',
60 'info_dict': {
61 'id': '3401037',
62 'ext': 'mp4',
63 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
64 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
65 'thumbnail': 're:^https?://.*',
66 'timestamp': 1463440500,
67 'upload_date': '20160516',
c897af8a 68 'uploader': 'CBS',
661d46b2
YCH
69 'subtitles': {
70 'en': 'mincount:5',
71 },
72 'categories': [
73 'Stations\\Spoken Word\\KCBSTV',
74 'Syndication\\MSN',
75 'Syndication\\NDN',
76 'Syndication\\AOL',
77 'Syndication\\Yahoo',
78 'Syndication\\Tribune',
79 'Syndication\\Curb.tv',
80 'Content\\News'
81 ],
c897af8a 82 'tags': ['CBS 2 News Evening'],
661d46b2
YCH
83 },
84 }, {
85 # SendtoNews embed
86 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
87 'info_dict': {
88 'id': 'GxfCe0Zo7D-175909-5588',
661d46b2 89 },
760845ce 90 'playlist_count': 9,
661d46b2
YCH
91 'params': {
92 # m3u8 download
93 'skip_download': True,
94 },
95 }]
96
97 def _real_extract(self, url):
98 display_id = self._match_id(url)
99 webpage = self._download_webpage(url, display_id)
100
101 sendtonews_url = SendtoNewsIE._extract_url(webpage)
102 if sendtonews_url:
760845ce
YCH
103 return self.url_result(
104 compat_urlparse.urljoin(url, sendtonews_url),
105 ie=SendtoNewsIE.ie_key())
106
107 info_dict = self._extract_anvato_videos(webpage, display_id)
661d46b2 108
06dbcd7b
S
109 timestamp = unified_timestamp(self._html_search_regex(
110 r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
111 'released date', default=None)) or parse_iso8601(
112 self._html_search_meta('uploadDate', webpage))
661d46b2
YCH
113
114 info_dict.update({
115 'display_id': display_id,
116 'timestamp': timestamp,
117 })
118
119 return info_dict