]>
Commit | Line | Data |
---|---|---|
575dad3c RLN |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
575dad3c | 5 | |
575dad3c | 6 | from .common import InfoExtractor |
7c39a655 S |
7 | from ..utils import ( |
8 | int_or_none, | |
9 | float_or_none, | |
10 | unified_strdate, | |
11 | ) | |
12 | ||
575dad3c RLN |
13 | |
14 | class PornoVoisinesIE(InfoExtractor): | |
b29cd565 | 15 | _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)' |
575dad3c RLN |
16 | |
17 | _TEST = { | |
b29cd565 YCH |
18 | 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', |
19 | 'md5': '6f8aca6a058592ab49fe701c8ba8317b', | |
575dad3c | 20 | 'info_dict': { |
b29cd565 | 21 | 'id': '919', |
575dad3c RLN |
22 | 'display_id': 'recherche-appartement', |
23 | 'ext': 'mp4', | |
7c39a655 | 24 | 'title': 'Recherche appartement', |
b29cd565 | 25 | 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', |
ec85ded8 | 26 | 'thumbnail': r're:^https?://.*\.jpg$', |
575dad3c | 27 | 'upload_date': '20140925', |
575dad3c | 28 | 'duration': 120, |
7c39a655 | 29 | 'view_count': int, |
575dad3c | 30 | 'average_rating': float, |
b29cd565 | 31 | 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], |
575dad3c | 32 | 'age_limit': 18, |
b29cd565 YCH |
33 | 'subtitles': { |
34 | 'fr': [{ | |
35 | 'ext': 'vtt', | |
36 | }] | |
37 | }, | |
575dad3c RLN |
38 | } |
39 | } | |
40 | ||
575dad3c RLN |
41 | def _real_extract(self, url): |
42 | mobj = re.match(self._VALID_URL, url) | |
7c39a655 S |
43 | video_id = mobj.group('id') |
44 | display_id = mobj.group('display_id') | |
45 | ||
b29cd565 YCH |
46 | settings_url = self._download_json( |
47 | 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, | |
48 | video_id, note='Getting settings URL')['video_settings_url'] | |
49 | settings = self._download_json(settings_url, video_id)['data'] | |
50 | ||
51 | formats = [] | |
52 | for kind, data in settings['variants'].items(): | |
53 | if kind == 'HLS': | |
54 | formats.extend(self._extract_m3u8_formats( | |
55 | data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) | |
56 | elif kind == 'MP4': | |
57 | for item in data: | |
58 | formats.append({ | |
59 | 'url': item['url'], | |
60 | 'height': item.get('height'), | |
61 | 'bitrate': item.get('bitrate'), | |
62 | }) | |
63 | self._sort_formats(formats) | |
7c39a655 | 64 | |
b29cd565 | 65 | webpage = self._download_webpage(url, video_id) |
7c39a655 | 66 | |
b29cd565 YCH |
67 | title = self._og_search_title(webpage) |
68 | description = self._og_search_description(webpage) | |
7c39a655 | 69 | |
b29cd565 YCH |
70 | # The webpage has a bug - there's no space between "thumb" and src= |
71 | thumbnail = self._html_search_regex( | |
72 | r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2', | |
73 | webpage, 'thumbnail', fatal=False, group='url') | |
7c39a655 S |
74 | |
75 | upload_date = unified_strdate(self._search_regex( | |
b29cd565 YCH |
76 | r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False)) |
77 | duration = settings.get('main', {}).get('duration') | |
7c39a655 S |
78 | view_count = int_or_none(self._search_regex( |
79 | r'(\d+) vues', webpage, 'view count', fatal=False)) | |
80 | average_rating = self._search_regex( | |
54eb81a0 | 81 | r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False) |
7c39a655 S |
82 | if average_rating: |
83 | average_rating = float_or_none(average_rating.replace(',', '.')) | |
84 | ||
b29cd565 YCH |
85 | categories = self._html_search_regex( |
86 | r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False) | |
7c39a655 S |
87 | if categories: |
88 | categories = [category.strip() for category in categories.split(',')] | |
575dad3c | 89 | |
b29cd565 YCH |
90 | subtitles = {'fr': [{ |
91 | 'url': subtitle, | |
92 | } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} | |
93 | ||
575dad3c | 94 | return { |
7c39a655 | 95 | 'id': video_id, |
575dad3c | 96 | 'display_id': display_id, |
b29cd565 | 97 | 'formats': formats, |
575dad3c | 98 | 'title': title, |
575dad3c RLN |
99 | 'description': description, |
100 | 'thumbnail': thumbnail, | |
7c39a655 S |
101 | 'upload_date': upload_date, |
102 | 'duration': duration, | |
103 | 'view_count': view_count, | |
575dad3c | 104 | 'average_rating': average_rating, |
7c39a655 | 105 | 'categories': categories, |
575dad3c | 106 | 'age_limit': 18, |
b29cd565 | 107 | 'subtitles': subtitles, |
575dad3c | 108 | } |