]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/cspan.py
[orf:iptv] Update test
[yt-dlp.git] / youtube_dl / extractor / cspan.py
CommitLineData
ca9e7922
PH
1from __future__ import unicode_literals
2
aa0c8739
JMF
3import re
4
5from .common import InfoExtractor
6from ..utils import (
aea6e7fc 7 int_or_none,
ca9e7922 8 unescapeHTML,
009a3408 9 find_xpath_attr,
2fe1b5bd 10 smuggle_url,
aa0c8739 11)
2fe1b5bd 12from .senateisvp import SenateISVPIE
aa0c8739 13
ca9e7922 14
aa0c8739 15class CSpanIE(InfoExtractor):
11a15be4 16 _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
ca9e7922 17 IE_DESC = 'C-SPAN'
11a15be4 18 _TESTS = [{
009a3408 19 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
ca9e7922
PH
20 'md5': '8e44ce11f0f725527daccc453f553eb0',
21 'info_dict': {
009a3408
JMF
22 'id': '315139',
23 'ext': 'mp4',
ca9e7922 24 'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
009a3408 25 'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
6f5ac90c 26 },
11577ec0 27 'skip': 'Regularly fails on travis, for unknown reasons',
11a15be4
PH
28 }, {
29 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
6f13b055 30 # For whatever reason, the served video alternates between
11a15be4 31 # two different ones
11a15be4
PH
32 'info_dict': {
33 'id': '340723',
34 'ext': 'mp4',
35 'title': 'International Health Care Models',
36 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
37 }
22a6f150
PH
38 }, {
39 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
92dcba1e 40 'md5': '446562a736c6bf97118e389433ed88d4',
22a6f150
PH
41 'info_dict': {
42 'id': '342759',
92dcba1e 43 'ext': 'mp4',
22a6f150 44 'title': 'General Motors Ignition Switch Recall',
92dcba1e
YCH
45 'duration': 14848,
46 'description': 'md5:70c7c3b8fa63fa60d42772440596034c'
22a6f150 47 },
2fe1b5bd
YCH
48 }, {
49 # Video from senate.gov
50 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
2fe1b5bd
YCH
51 'info_dict': {
52 'id': 'judiciary031715',
53 'ext': 'flv',
54 'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
55 }
11a15be4 56 }]
aa0c8739
JMF
57
58 def _real_extract(self, url):
59 mobj = re.match(self._VALID_URL, url)
009a3408
JMF
60 page_id = mobj.group('id')
61 webpage = self._download_webpage(url, page_id)
11a15be4 62 video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
009a3408
JMF
63
64 description = self._html_search_regex(
65 [
66 # The full description
67 r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
68 # If the description is small enough the other div is not
69 # present, otherwise this is a stripped version
70 r'<p class=\'initial\'>(.*?)</p>'
71 ],
2fe1b5bd 72 webpage, 'description', flags=re.DOTALL, default=None)
ca9e7922
PH
73
74 info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
009a3408 75 data = self._download_json(info_url, video_id)
ca9e7922 76
aea6e7fc
PH
77 doc = self._download_xml(
78 'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
009a3408
JMF
79 video_id)
80
aea6e7fc
PH
81 title = find_xpath_attr(doc, './/string', 'name', 'title').text
82 thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
83
2fe1b5bd
YCH
84 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
85 if senate_isvp_url:
86 surl = smuggle_url(senate_isvp_url, {'force_title': title})
87 return self.url_result(surl, 'SenateISVP', video_id, title)
88
aea6e7fc
PH
89 files = data['video']['files']
90
91 entries = [{
92 'id': '%s_%d' % (video_id, partnum + 1),
93 'title': (
94 title if len(files) == 1 else
95 '%s part %d' % (title, partnum + 1)),
96 'url': unescapeHTML(f['path']['#text']),
97 'description': description,
98 'thumbnail': thumbnail,
99 'duration': int_or_none(f.get('length', {}).get('#text')),
100 } for partnum, f in enumerate(files)]
009a3408 101
92dcba1e
YCH
102 if len(entries) == 1:
103 entry = dict(entries[0])
104 entry['id'] = video_id
105 return entry
106 else:
107 return {
108 '_type': 'playlist',
109 'entries': entries,
110 'title': title,
111 'id': video_id,
112 }