]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/cspan.py
[vimeo] Fix non-ASCII album passwords
[yt-dlp.git] / youtube_dl / extractor / cspan.py
CommitLineData
ca9e7922
PH
1from __future__ import unicode_literals
2
aa0c8739
JMF
3import re
4
5from .common import InfoExtractor
6from ..utils import (
aea6e7fc 7 int_or_none,
ca9e7922 8 unescapeHTML,
009a3408 9 find_xpath_attr,
2fe1b5bd 10 smuggle_url,
672f1bd8 11 determine_ext,
aa0c8739 12)
2fe1b5bd 13from .senateisvp import SenateISVPIE
aa0c8739 14
ca9e7922 15
aa0c8739 16class CSpanIE(InfoExtractor):
11a15be4 17 _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
ca9e7922 18 IE_DESC = 'C-SPAN'
11a15be4 19 _TESTS = [{
009a3408 20 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
ca9e7922
PH
21 'md5': '8e44ce11f0f725527daccc453f553eb0',
22 'info_dict': {
009a3408
JMF
23 'id': '315139',
24 'ext': 'mp4',
ca9e7922 25 'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
009a3408 26 'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
6f5ac90c 27 },
11577ec0 28 'skip': 'Regularly fails on travis, for unknown reasons',
11a15be4
PH
29 }, {
30 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
6f13b055 31 # For whatever reason, the served video alternates between
11a15be4 32 # two different ones
11a15be4
PH
33 'info_dict': {
34 'id': '340723',
35 'ext': 'mp4',
36 'title': 'International Health Care Models',
37 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
38 }
22a6f150
PH
39 }, {
40 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
92dcba1e 41 'md5': '446562a736c6bf97118e389433ed88d4',
22a6f150
PH
42 'info_dict': {
43 'id': '342759',
92dcba1e 44 'ext': 'mp4',
22a6f150 45 'title': 'General Motors Ignition Switch Recall',
92dcba1e
YCH
46 'duration': 14848,
47 'description': 'md5:70c7c3b8fa63fa60d42772440596034c'
22a6f150 48 },
2fe1b5bd
YCH
49 }, {
50 # Video from senate.gov
51 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
2fe1b5bd
YCH
52 'info_dict': {
53 'id': 'judiciary031715',
54 'ext': 'flv',
55 'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
56 }
11a15be4 57 }]
aa0c8739
JMF
58
59 def _real_extract(self, url):
60 mobj = re.match(self._VALID_URL, url)
009a3408
JMF
61 page_id = mobj.group('id')
62 webpage = self._download_webpage(url, page_id)
11a15be4 63 video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
009a3408
JMF
64
65 description = self._html_search_regex(
66 [
67 # The full description
68 r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
69 # If the description is small enough the other div is not
70 # present, otherwise this is a stripped version
71 r'<p class=\'initial\'>(.*?)</p>'
72 ],
2fe1b5bd 73 webpage, 'description', flags=re.DOTALL, default=None)
ca9e7922
PH
74
75 info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
009a3408 76 data = self._download_json(info_url, video_id)
ca9e7922 77
aea6e7fc
PH
78 doc = self._download_xml(
79 'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
009a3408
JMF
80 video_id)
81
aea6e7fc
PH
82 title = find_xpath_attr(doc, './/string', 'name', 'title').text
83 thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
84
2fe1b5bd
YCH
85 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
86 if senate_isvp_url:
87 surl = smuggle_url(senate_isvp_url, {'force_title': title})
88 return self.url_result(surl, 'SenateISVP', video_id, title)
89
aea6e7fc 90 files = data['video']['files']
672f1bd8
YCH
91 try:
92 capfile = data['video']['capfile']['#text']
93 except KeyError:
94 capfile = None
aea6e7fc
PH
95
96 entries = [{
97 'id': '%s_%d' % (video_id, partnum + 1),
98 'title': (
99 title if len(files) == 1 else
100 '%s part %d' % (title, partnum + 1)),
101 'url': unescapeHTML(f['path']['#text']),
102 'description': description,
103 'thumbnail': thumbnail,
104 'duration': int_or_none(f.get('length', {}).get('#text')),
672f1bd8
YCH
105 'subtitles': {
106 'en': [{
107 'url': capfile,
108 'ext': determine_ext(capfile, 'dfxp')
109 }],
110 } if capfile else None,
aea6e7fc 111 } for partnum, f in enumerate(files)]
009a3408 112
92dcba1e
YCH
113 if len(entries) == 1:
114 entry = dict(entries[0])
115 entry['id'] = video_id
116 return entry
117 else:
118 return {
119 '_type': 'playlist',
120 'entries': entries,
121 'title': title,
122 'id': video_id,
123 }