2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
8 from .kaltura
import KalturaIE
19 class AsianCrushBaseIE(InfoExtractor
):
20 _VALID_URL_BASE
= r
'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
22 'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
23 'widescreen_thumbnail_url', 'screencap_widescreen',
25 _API_SUFFIX
= {'retrocrush.tv': '-ott'}
27 def _call_api(self
, host
, endpoint
, video_id
, query
, resource
):
28 return self
._download
_json
(
29 'https://api%s.%s/%s' % (self
._API
_SUFFIX
.get(host
, ''), host
, endpoint
), video_id
,
30 'Downloading %s JSON metadata' % resource
, query
=query
,
31 headers
=self
.geo_verification_headers())['objects']
33 def _download_object_data(self
, host
, object_id
, resource
):
34 return self
._call
_api
(
35 host
, 'search', object_id
, {'id': object_id}
, resource
)[0]
37 def _get_object_description(self
, obj
):
38 return strip_or_none(obj
.get('long_description') or obj
.get('short_description'))
40 def _parse_video_data(self
, video
):
43 entry_id
, partner_id
= [None] * 2
44 for k
in self
._KALTURA
_KEYS
:
47 mobj
= re
.search(r
'/p/(\d+)/.+?/entryId/([^/]+)/', k_url
)
49 partner_id
, entry_id
= mobj
.groups()
52 meta_categories
= try_get(video
, lambda x
: x
['meta']['categories'], list) or []
53 categories
= list(filter(None, [c
.get('name') for c
in meta_categories
]))
55 show_info
= video
.get('show_info') or {}
58 '_type': 'url_transparent',
59 'url': 'kaltura:%s:%s' % (partner_id
, entry_id
),
60 'ie_key': KalturaIE
.ie_key(),
63 'description': self
._get
_object
_description
(video
),
64 'age_limit': parse_age_limit(video
.get('mpaa_rating') or video
.get('tv_rating')),
65 'categories': categories
,
66 'series': show_info
.get('show_name'),
67 'season_number': int_or_none(show_info
.get('season_num')),
68 'season_id': show_info
.get('season_id'),
69 'episode_number': int_or_none(show_info
.get('episode_num')),
73 class AsianCrushIE(AsianCrushBaseIE
):
74 _VALID_URL
= r
'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE
._VALID
_URL
_BASE
76 'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
77 'md5': 'c3b740e48d0ba002a42c0b72857beae6',
81 'title': 'Women Who Flirt',
82 'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
83 'timestamp': 1496936429,
84 'upload_date': '20170608',
85 'uploader_id': 'craig@crifkin.com',
87 'categories': 'count:5',
91 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
92 'only_matching': True,
94 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
95 'only_matching': True,
97 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
98 'only_matching': True,
100 'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
101 'only_matching': True,
103 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
104 'only_matching': True,
106 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
107 'only_matching': True,
109 'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
110 'only_matching': True,
113 def _real_extract(self
, url
):
114 host
, video_id
= self
._match
_valid
_url
(url
).groups()
116 if host
== 'cocoro.tv':
117 webpage
= self
._download
_webpage
(url
, video_id
)
118 embed_vars
= self
._parse
_json
(self
._search
_regex
(
119 r
'iEmbedVars\s*=\s*({.+?})', webpage
, 'embed vars',
120 default
='{}'), video_id, fatal=False) or {}
121 video_id = embed_vars.get('entry_id
') or video_id
123 video = self._download_object_data(host, video_id, 'video
')
124 return self._parse_video_data(video)
127 class AsianCrushPlaylistIE(AsianCrushBaseIE):
128 _VALID_URL = r'%s/series
/0+(?P
<id>\d
+)s
\b' % AsianCrushBaseIE._VALID_URL_BASE
130 'url
': 'https
://www
.asiancrush
.com
/series
/006447s
/fruity
-samurai
',
133 'title
': 'Fruity Samurai
',
134 'description
': 'md5
:7535174487e4a202d3872a7fc8f2f154
',
136 'playlist_count
': 13,
138 'url
': 'https
://www
.yuyutv
.com
/series
/013920s
/peep
-show
/',
139 'only_matching
': True,
141 'url
': 'https
://www
.midnightpulp
.com
/series
/016375s
/mononoke
/',
142 'only_matching
': True,
144 'url
': 'https
://www
.cocoro
.tv
/series
/008549s
/the
-wonderful
-wizard
-of
-oz
/',
145 'only_matching
': True,
147 'url
': 'https
://www
.retrocrush
.tv
/series
/012355s
/true
-tears
',
148 'only_matching
': True,
150 _PAGE_SIZE = 1000000000
152 def _fetch_page(self, domain, parent_id, page):
153 videos = self._call_api(
154 domain, 'getreferencedobjects
', parent_id, {
155 'max': self._PAGE_SIZE,
156 'object_type
': 'video
',
157 'parent_id
': parent_id,
158 'start
': page * self._PAGE_SIZE,
159 }, 'page
%d' % (page + 1))
161 yield self._parse_video_data(video)
163 def _real_extract(self, url):
164 host, playlist_id = self._match_valid_url(url).groups()
166 if host == 'cocoro
.tv
':
167 webpage = self._download_webpage(url, playlist_id)
171 for mobj in re.finditer(
172 r'<a
[^
>]+href
=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
174 attrs = extract_attributes(mobj.group(0))
175 if attrs.get('class') == 'clearfix':
176 entries.append(self.url_result(
177 mobj.group('url'), ie=AsianCrushIE.ie_key()))
179 title = self._html_search_regex(
180 r'(?s)<h1\b[^>]\bid=["\']movieTitle
[^
>]+>(.+?
)</h1
>', webpage,
181 'title
', default=None) or self._og_search_title(
182 webpage, default=None) or self._html_search_meta(
183 'twitter
:title
', webpage, 'title
',
184 default=None) or self._search_regex(
185 r'<title
>([^
<]+)</title
>', webpage, 'title
', fatal=False)
187 title = re.sub(r'\s
*\|\s
*.+?$
', '', title)
189 description = self._og_search_description(
190 webpage, default=None) or self._html_search_meta(
191 'twitter
:description
', webpage, 'description
', fatal=False)
193 show = self._download_object_data(host, playlist_id, 'show
')
194 title = show.get('name
')
195 description = self._get_object_description(show)
196 entries = OnDemandPagedList(
197 functools.partial(self._fetch_page, host, playlist_id),
200 return self.playlist_result(entries, playlist_id, title, description)