]>
Commit | Line | Data |
---|---|---|
9271bc83 PH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import json | |
5 | import re | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
0e518e2f | 9 | ExtractorError, |
9271bc83 PH |
10 | int_or_none, |
11 | ) | |
12 | ||
13 | ||
14 | class CNETIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' | |
16 | _TEST = { | |
17 | 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', | |
18 | 'md5': '041233212a0d06b179c87cbcca1577b8', | |
19 | 'info_dict': { | |
20 | 'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'Hands-on with Microsoft Windows 8.1 Update', | |
23 | 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', | |
24 | 'thumbnail': 're:^http://.*/flmswindows8.jpg$', | |
25 | 'uploader_id': 'sarah.mitroff@cbsinteractive.com', | |
26 | 'uploader': 'Sarah Mitroff', | |
27 | } | |
28 | } | |
29 | ||
30 | def _real_extract(self, url): | |
31 | mobj = re.match(self._VALID_URL, url) | |
32 | display_id = mobj.group('id') | |
33 | ||
34 | webpage = self._download_webpage(url, display_id) | |
35 | data_json = self._html_search_regex( | |
e51880fd | 36 | r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'", |
9271bc83 PH |
37 | webpage, 'data json') |
38 | data = json.loads(data_json) | |
39 | vdata = data['video'] | |
0e518e2f PH |
40 | if not vdata: |
41 | vdata = data['videos'][0] | |
42 | if not vdata: | |
43 | raise ExtractorError('Cannot find video data') | |
9271bc83 PH |
44 | |
45 | video_id = vdata['id'] | |
6f5342a2 PH |
46 | title = vdata.get('headline') |
47 | if title is None: | |
48 | title = vdata.get('title') | |
49 | if title is None: | |
50 | raise ExtractorError('Cannot find title!') | |
9271bc83 PH |
51 | description = vdata.get('dek') |
52 | thumbnail = vdata.get('image', {}).get('path') | |
53 | author = vdata.get('author') | |
54 | if author: | |
55 | uploader = '%s %s' % (author['firstName'], author['lastName']) | |
56 | uploader_id = author.get('email') | |
57 | else: | |
58 | uploader = None | |
59 | uploader_id = None | |
60 | ||
61 | formats = [{ | |
62 | 'format_id': '%s-%s-%s' % ( | |
63 | f['type'], f['format'], | |
64 | int_or_none(f.get('bitrate'), 1000, default='')), | |
65 | 'url': f['uri'], | |
66 | 'tbr': int_or_none(f.get('bitrate'), 1000), | |
67 | } for f in vdata['files']['data']] | |
68 | self._sort_formats(formats) | |
69 | ||
70 | return { | |
71 | 'id': video_id, | |
72 | 'display_id': display_id, | |
73 | 'title': title, | |
74 | 'formats': formats, | |
75 | 'description': description, | |
76 | 'uploader': uploader, | |
77 | 'uploader_id': uploader_id, | |
78 | 'thumbnail': thumbnail, | |
79 | } |