]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/cspan.py
[fivemin] Get the 'sid' from the embed page (fixes #2745)
[yt-dlp.git] / youtube_dl / extractor / cspan.py
CommitLineData
ca9e7922
PH
1from __future__ import unicode_literals
2
aa0c8739
JMF
3import re
4
5from .common import InfoExtractor
6from ..utils import (
aea6e7fc 7 int_or_none,
ca9e7922 8 unescapeHTML,
009a3408 9 find_xpath_attr,
aa0c8739
JMF
10)
11
ca9e7922 12
aa0c8739 13class CSpanIE(InfoExtractor):
11a15be4 14 _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
ca9e7922 15 IE_DESC = 'C-SPAN'
11a15be4 16 _TESTS = [{
009a3408 17 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
ca9e7922
PH
18 'md5': '8e44ce11f0f725527daccc453f553eb0',
19 'info_dict': {
009a3408
JMF
20 'id': '315139',
21 'ext': 'mp4',
ca9e7922 22 'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
009a3408 23 'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
6f5ac90c 24 },
11577ec0 25 'skip': 'Regularly fails on travis, for unknown reasons',
11a15be4
PH
26 }, {
27 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
6f13b055 28 # For whatever reason, the served video alternates between
11a15be4
PH
29 # two different ones
30 #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
31 'info_dict': {
32 'id': '340723',
33 'ext': 'mp4',
34 'title': 'International Health Care Models',
35 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
36 }
37 }]
aa0c8739
JMF
38
39 def _real_extract(self, url):
40 mobj = re.match(self._VALID_URL, url)
009a3408
JMF
41 page_id = mobj.group('id')
42 webpage = self._download_webpage(url, page_id)
11a15be4 43 video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
009a3408
JMF
44
45 description = self._html_search_regex(
46 [
47 # The full description
48 r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
49 # If the description is small enough the other div is not
50 # present, otherwise this is a stripped version
51 r'<p class=\'initial\'>(.*?)</p>'
52 ],
53 webpage, 'description', flags=re.DOTALL)
ca9e7922
PH
54
55 info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
009a3408 56 data = self._download_json(info_url, video_id)
ca9e7922 57
aea6e7fc
PH
58 doc = self._download_xml(
59 'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
009a3408
JMF
60 video_id)
61
aea6e7fc
PH
62 title = find_xpath_attr(doc, './/string', 'name', 'title').text
63 thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
64
65 files = data['video']['files']
66
67 entries = [{
68 'id': '%s_%d' % (video_id, partnum + 1),
69 'title': (
70 title if len(files) == 1 else
71 '%s part %d' % (title, partnum + 1)),
72 'url': unescapeHTML(f['path']['#text']),
73 'description': description,
74 'thumbnail': thumbnail,
75 'duration': int_or_none(f.get('length', {}).get('#text')),
76 } for partnum, f in enumerate(files)]
009a3408 77
ca9e7922 78 return {
aea6e7fc
PH
79 '_type': 'playlist',
80 'entries': entries,
81 'title': title,
ca9e7922 82 'id': video_id,
ca9e7922 83 }