7 from .keezmovies
import KeezMoviesIE
10 class Tube8IE(KeezMoviesIE
): # XXX: Do not subclass from concrete IE
11 _VALID_URL
= r
'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
12 _EMBED_REGEX
= [r
'<iframe[^>]+\bsrc=["\'](?P
<url
>(?
:https?
:)?
//(?
:www\
.)?tube8\
.com
/embed
/(?
:[^
/]+/)+\d
+)']
14 'url
': 'http
://www
.tube8
.com
/teen
/kasia
-music
-video
/229795/',
15 'md5
': '65e20c48e6abff62ed0c3965fff13a39
',
18 'display_id
': 'kasia
-music
-video
',
20 'description
': 'hot teen Kasia grinding
',
21 'uploader
': 'unknown
',
22 'title
': 'Kasia music video
',
25 'categories
': ['Teen
'],
29 'url
': 'http
://www
.tube8
.com
/shemale
/teen
/blonde
-cd
-gets
-kidnapped
-by
-two
-blacks
-and-punished
-for-being
-a
-slutty
-girl
/19569151/',
30 'only_matching
': True,
33 def _real_extract(self, url):
34 webpage, info = self._extract_info(url)
37 info['title
'] = self._html_search_regex(
38 r'videoTitle\s
*=\s
*"([^"]+)', webpage, 'title
')
40 description = self._html_search_regex(
41 r'(?s
)Description
:</dt
>\s
*<dd
>(.+?
)</dd
>', webpage, 'description
', fatal=False)
42 uploader = self._html_search_regex(
43 r'<span
class="username">\s
*(.+?
)\s
*<',
44 webpage, 'uploader
', fatal=False)
46 like_count = int_or_none(self._search_regex(
47 r'rupVar\s
*=\s
*"(\d+)"', webpage, 'like count
', fatal=False))
48 dislike_count = int_or_none(self._search_regex(
49 r'rdownVar\s
*=\s
*"(\d+)"', webpage, 'dislike count
', fatal=False))
50 view_count = str_to_int(self._search_regex(
51 r'Views
:\s
*</dt
>\s
*<dd
>([\d
,\
.]+)',
52 webpage, 'view count
', fatal=False))
53 comment_count = str_to_int(self._search_regex(
54 r'<span
id="allCommentsCount">(\d
+)</span
>',
55 webpage, 'comment count
', fatal=False))
57 category = self._search_regex(
58 r'Category
:\s
*</dt
>\s
*<dd
>\s
*<a
[^
>]+href
=[^
>]+>([^
<]+)',
59 webpage, 'category
', fatal=False)
60 categories = [category] if category else None
62 tags_str = self._search_regex(
63 r'(?s
)Tags
:\s
*</dt
>\s
*<dd
>(.+?
)</(?
!a
)',
64 webpage, 'tags
', fatal=False)
65 tags = [t for t in re.findall(
66 r'<a
[^
>]+href
=[^
>]+>([^
<]+)', tags_str)] if tags_str else None
69 'description
': description,
71 'view_count
': view_count,
72 'like_count
': like_count,
73 'dislike_count
': dislike_count,
74 'comment_count
': comment_count,
75 'categories
': categories,