]>
Commit | Line | Data |
---|---|---|
7beb36a5 PH |
1 | import re |
2 | import socket | |
3 | import xml.etree.ElementTree | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_http_client, | |
8 | compat_str, | |
9 | compat_urllib_error, | |
10 | compat_urllib_parse_urlparse, | |
11 | compat_urllib_request, | |
12 | ||
13 | ExtractorError, | |
14 | ) | |
15 | ||
16 | ||
17 | class CollegeHumorIE(InfoExtractor): | |
18 | _WORKING = False | |
19 | _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' | |
20 | ||
21 | def report_manifest(self, video_id): | |
22 | """Report information extraction.""" | |
23 | self.to_screen(u'%s: Downloading XML manifest' % video_id) | |
24 | ||
25 | def _real_extract(self, url): | |
26 | mobj = re.match(self._VALID_URL, url) | |
27 | if mobj is None: | |
28 | raise ExtractorError(u'Invalid URL: %s' % url) | |
29 | video_id = mobj.group('videoid') | |
30 | ||
31 | info = { | |
32 | 'id': video_id, | |
33 | 'uploader': None, | |
34 | 'upload_date': None, | |
35 | } | |
36 | ||
37 | self.report_extraction(video_id) | |
38 | xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id | |
39 | try: | |
40 | metaXml = compat_urllib_request.urlopen(xmlUrl).read() | |
41 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
42 | raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err)) | |
43 | ||
44 | mdoc = xml.etree.ElementTree.fromstring(metaXml) | |
45 | try: | |
46 | videoNode = mdoc.findall('./video')[0] | |
47 | info['description'] = videoNode.findall('./description')[0].text | |
48 | info['title'] = videoNode.findall('./caption')[0].text | |
49 | info['thumbnail'] = videoNode.findall('./thumbnail')[0].text | |
50 | manifest_url = videoNode.findall('./file')[0].text | |
51 | except IndexError: | |
52 | raise ExtractorError(u'Invalid metadata XML file') | |
53 | ||
54 | manifest_url += '?hdcore=2.10.3' | |
55 | self.report_manifest(video_id) | |
56 | try: | |
57 | manifestXml = compat_urllib_request.urlopen(manifest_url).read() | |
58 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
59 | raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err)) | |
60 | ||
61 | adoc = xml.etree.ElementTree.fromstring(manifestXml) | |
62 | try: | |
63 | media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] | |
64 | node_id = media_node.attrib['url'] | |
65 | video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | |
66 | except IndexError as err: | |
67 | raise ExtractorError(u'Invalid manifest file') | |
68 | ||
69 | url_pr = compat_urllib_parse_urlparse(manifest_url) | |
70 | url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1' | |
71 | ||
72 | info['url'] = url | |
73 | info['ext'] = 'f4f' | |
74 | return [info] |