]>
Commit | Line | Data |
---|---|---|
8b0d7a66 KM |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8b0d7a66 | 8 | ExtractorError, |
577281b0 KM |
9 | float_or_none, |
10 | int_or_none, | |
11 | parse_duration, | |
8b0d7a66 KM |
12 | ) |
13 | ||
14 | ||
15 | class CDAIE(InfoExtractor): | |
f1ced6df | 16 | _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' |
577281b0 | 17 | _BASE_URL = 'http://www.cda.pl/' |
f1ced6df S |
18 | _TESTS = [{ |
19 | 'url': 'http://www.cda.pl/video/5749950c', | |
20 | 'md5': '6f844bf51b15f31fae165365707ae970', | |
21 | 'info_dict': { | |
22 | 'id': '5749950c', | |
23 | 'ext': 'mp4', | |
24 | 'height': 720, | |
25 | 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', | |
577281b0 KM |
26 | 'description': 'md5:269ccd135d550da90d1662651fcb9772', |
27 | 'thumbnail': 're:^https?://.*\.jpg$', | |
28 | 'average_rating': float, | |
f1ced6df S |
29 | 'duration': 39 |
30 | } | |
31 | }, { | |
32 | 'url': 'http://www.cda.pl/video/57413289', | |
33 | 'md5': 'a88828770a8310fc00be6c95faf7f4d5', | |
34 | 'info_dict': { | |
35 | 'id': '57413289', | |
36 | 'ext': 'mp4', | |
37 | 'title': 'Lądowanie na lotnisku na Maderze', | |
577281b0 KM |
38 | 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', |
39 | 'thumbnail': 're:^https?://.*\.jpg$', | |
40 | 'uploader': 'crash404', | |
41 | 'view_count': int, | |
42 | 'average_rating': float, | |
f1ced6df | 43 | 'duration': 137 |
8b0d7a66 | 44 | } |
f1ced6df S |
45 | }, { |
46 | 'url': 'http://ebd.cda.pl/0x0/5749950c', | |
47 | 'only_matching': True, | |
48 | }] | |
8b0d7a66 KM |
49 | |
50 | def _real_extract(self, url): | |
51 | video_id = self._match_id(url) | |
577281b0 KM |
52 | self._set_cookie('cda.pl', 'cda.player', 'html5') |
53 | webpage = self._download_webpage( | |
54 | self._BASE_URL + '/video/' + video_id, video_id) | |
8b0d7a66 KM |
55 | |
56 | if 'Ten film jest dostępny dla użytkowników premium' in webpage: | |
57 | raise ExtractorError('This video is only available for premium users.', expected=True) | |
58 | ||
8b0d7a66 KM |
59 | formats = [] |
60 | ||
577281b0 KM |
61 | uploader = self._search_regex(r'''(?x) |
62 | <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> | |
63 | (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*? | |
64 | <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3> | |
65 | ''', webpage, 'uploader', default=None, group='uploader') | |
66 | view_count = self._search_regex( | |
67 | r'Odsłony:(?:\s| )*([0-9]+)', webpage, | |
68 | 'view_count', default=None) | |
69 | average_rating = self._search_regex( | |
70 | r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', | |
71 | webpage, 'rating', fatal=False, group='rating_value') | |
72 | ||
f1ced6df S |
73 | info_dict = { |
74 | 'id': video_id, | |
577281b0 KM |
75 | 'title': self._og_search_title(webpage), |
76 | 'description': self._og_search_description(webpage), | |
77 | 'uploader': uploader, | |
78 | 'view_count': int_or_none(view_count), | |
79 | 'average_rating': float_or_none(average_rating), | |
80 | 'thumbnail': self._og_search_thumbnail(webpage), | |
f1ced6df S |
81 | 'formats': formats, |
82 | 'duration': None, | |
83 | } | |
8b0d7a66 | 84 | |
f1ced6df | 85 | def extract_format(page, version): |
577281b0 KM |
86 | json_str = self._search_regex( |
87 | r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, | |
88 | '%s player_json' % version, fatal=False, group='player_data') | |
89 | if not json_str: | |
90 | return | |
91 | player_data = self._parse_json( | |
92 | json_str, '%s player_data' % version, fatal=False) | |
93 | if not player_data: | |
94 | return | |
95 | video = player_data.get('video') | |
96 | if not video or 'file' not in video: | |
97 | self.report_warning('Unable to extract %s version information' % version) | |
f1ced6df S |
98 | return |
99 | f = { | |
577281b0 | 100 | 'url': video['file'], |
f1ced6df S |
101 | } |
102 | m = re.search( | |
103 | r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p', | |
104 | page) | |
105 | if m: | |
106 | f.update({ | |
107 | 'format_id': m.group('format_id'), | |
108 | 'height': int(m.group('height')), | |
109 | }) | |
110 | info_dict['formats'].append(f) | |
111 | if not info_dict['duration']: | |
577281b0 | 112 | info_dict['duration'] = parse_duration(video.get('duration')) |
f1ced6df S |
113 | |
114 | extract_format(webpage, 'default') | |
115 | ||
116 | for href, resolution in re.findall( | |
117 | r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', | |
118 | webpage): | |
119 | webpage = self._download_webpage( | |
577281b0 KM |
120 | self._BASE_URL + href, video_id, |
121 | 'Downloading %s version information' % resolution, fatal=False) | |
8b0d7a66 | 122 | if not webpage: |
f1ced6df S |
123 | # Manually report warning because empty page is returned when |
124 | # invalid version is requested. | |
125 | self.report_warning('Unable to download %s version information' % resolution) | |
8b0d7a66 | 126 | continue |
f1ced6df | 127 | extract_format(webpage, resolution) |
8b0d7a66 KM |
128 | |
129 | self._sort_formats(formats) | |
130 | ||
f1ced6df | 131 | return info_dict |