]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/cbslocal.py
[sendtonews] Fix extraction
[yt-dlp.git] / youtube_dl / extractor / cbslocal.py
CommitLineData
661d46b2
YCH
1# coding: utf-8
2from __future__ import unicode_literals
3
661d46b2
YCH
4from .anvato import AnvatoIE
5from .sendtonews import SendtoNewsIE
6from ..compat import compat_urlparse
962250f7 7from ..utils import unified_timestamp
661d46b2
YCH
8
9
10class CBSLocalIE(AnvatoIE):
11 _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
12
13 _TESTS = [{
14 # Anvato backend
15 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
16 'md5': 'f0ee3081e3843f575fccef901199b212',
17 'info_dict': {
18 'id': '3401037',
19 'ext': 'mp4',
20 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
21 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
22 'thumbnail': 're:^https?://.*',
23 'timestamp': 1463440500,
24 'upload_date': '20160516',
25 'subtitles': {
26 'en': 'mincount:5',
27 },
28 'categories': [
29 'Stations\\Spoken Word\\KCBSTV',
30 'Syndication\\MSN',
31 'Syndication\\NDN',
32 'Syndication\\AOL',
33 'Syndication\\Yahoo',
34 'Syndication\\Tribune',
35 'Syndication\\Curb.tv',
36 'Content\\News'
37 ],
38 },
39 }, {
40 # SendtoNews embed
41 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
42 'info_dict': {
43 'id': 'GxfCe0Zo7D-175909-5588',
44 'ext': 'mp4',
45 'title': 'Recap: CLE 15, CIN 6',
46 'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
47 'upload_date': '20160516',
48 'timestamp': 1463433840,
49 'duration': 49,
50 },
51 'params': {
52 # m3u8 download
53 'skip_download': True,
54 },
55 }]
56
57 def _real_extract(self, url):
58 display_id = self._match_id(url)
59 webpage = self._download_webpage(url, display_id)
60
61 sendtonews_url = SendtoNewsIE._extract_url(webpage)
62 if sendtonews_url:
63 info_dict = {
64 '_type': 'url_transparent',
65 'url': compat_urlparse.urljoin(url, sendtonews_url),
66 }
67 else:
68 info_dict = self._extract_anvato_videos(webpage, display_id)
69
70 time_str = self._html_search_regex(
71 r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
962250f7 72 timestamp = unified_timestamp(time_str)
661d46b2
YCH
73
74 info_dict.update({
75 'display_id': display_id,
76 'timestamp': timestamp,
77 })
78
79 return info_dict