]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/cspan.py
[youtube] Modernize
[yt-dlp.git] / youtube_dl / extractor / cspan.py
CommitLineData
ca9e7922
PH
1from __future__ import unicode_literals
2
aa0c8739
JMF
3import re
4
5from .common import InfoExtractor
6from ..utils import (
aea6e7fc 7 int_or_none,
ca9e7922 8 unescapeHTML,
009a3408 9 find_xpath_attr,
aa0c8739
JMF
10)
11
ca9e7922 12
aa0c8739 13class CSpanIE(InfoExtractor):
11a15be4 14 _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
ca9e7922 15 IE_DESC = 'C-SPAN'
11a15be4 16 _TESTS = [{
009a3408 17 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
ca9e7922
PH
18 'md5': '8e44ce11f0f725527daccc453f553eb0',
19 'info_dict': {
009a3408
JMF
20 'id': '315139',
21 'ext': 'mp4',
ca9e7922 22 'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
009a3408 23 'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
6f5ac90c 24 },
11577ec0 25 'skip': 'Regularly fails on travis, for unknown reasons',
11a15be4
PH
26 }, {
27 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
6f13b055 28 # For whatever reason, the served video alternates between
11a15be4
PH
29 # two different ones
30 #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
31 'info_dict': {
32 'id': '340723',
33 'ext': 'mp4',
34 'title': 'International Health Care Models',
35 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
36 }
22a6f150
PH
37 }, {
38 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
39 'info_dict': {
40 'id': '342759',
41 'title': 'General Motors Ignition Switch Recall',
42 },
43 'playlist_duration_sum': 14855,
11a15be4 44 }]
aa0c8739
JMF
45
46 def _real_extract(self, url):
47 mobj = re.match(self._VALID_URL, url)
009a3408
JMF
48 page_id = mobj.group('id')
49 webpage = self._download_webpage(url, page_id)
11a15be4 50 video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
009a3408
JMF
51
52 description = self._html_search_regex(
53 [
54 # The full description
55 r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
56 # If the description is small enough the other div is not
57 # present, otherwise this is a stripped version
58 r'<p class=\'initial\'>(.*?)</p>'
59 ],
60 webpage, 'description', flags=re.DOTALL)
ca9e7922
PH
61
62 info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
009a3408 63 data = self._download_json(info_url, video_id)
ca9e7922 64
aea6e7fc
PH
65 doc = self._download_xml(
66 'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
009a3408
JMF
67 video_id)
68
aea6e7fc
PH
69 title = find_xpath_attr(doc, './/string', 'name', 'title').text
70 thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
71
72 files = data['video']['files']
73
74 entries = [{
75 'id': '%s_%d' % (video_id, partnum + 1),
76 'title': (
77 title if len(files) == 1 else
78 '%s part %d' % (title, partnum + 1)),
79 'url': unescapeHTML(f['path']['#text']),
80 'description': description,
81 'thumbnail': thumbnail,
82 'duration': int_or_none(f.get('length', {}).get('#text')),
83 } for partnum, f in enumerate(files)]
009a3408 84
ca9e7922 85 return {
aea6e7fc
PH
86 '_type': 'playlist',
87 'entries': entries,
88 'title': title,
ca9e7922 89 'id': video_id,
ca9e7922 90 }