4 from .common
import InfoExtractor
5 from .kaltura
import KalturaIE
16 class AsianCrushBaseIE(InfoExtractor
):
17 _VALID_URL_BASE
= r
'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
19 'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
20 'widescreen_thumbnail_url', 'screencap_widescreen',
22 _API_SUFFIX
= {'retrocrush.tv': '-ott'}
24 def _call_api(self
, host
, endpoint
, video_id
, query
, resource
):
25 return self
._download
_json
(
26 'https://api%s.%s/%s' % (self
._API
_SUFFIX
.get(host
, ''), host
, endpoint
), video_id
,
27 'Downloading %s JSON metadata' % resource
, query
=query
,
28 headers
=self
.geo_verification_headers())['objects']
30 def _download_object_data(self
, host
, object_id
, resource
):
31 return self
._call
_api
(
32 host
, 'search', object_id
, {'id': object_id}
, resource
)[0]
34 def _get_object_description(self
, obj
):
35 return strip_or_none(obj
.get('long_description') or obj
.get('short_description'))
37 def _parse_video_data(self
, video
):
40 entry_id
, partner_id
= [None] * 2
41 for k
in self
._KALTURA
_KEYS
:
44 mobj
= re
.search(r
'/p/(\d+)/.+?/entryId/([^/]+)/', k_url
)
46 partner_id
, entry_id
= mobj
.groups()
49 meta_categories
= try_get(video
, lambda x
: x
['meta']['categories'], list) or []
50 categories
= list(filter(None, [c
.get('name') for c
in meta_categories
]))
52 show_info
= video
.get('show_info') or {}
55 '_type': 'url_transparent',
56 'url': 'kaltura:%s:%s' % (partner_id
, entry_id
),
57 'ie_key': KalturaIE
.ie_key(),
60 'description': self
._get
_object
_description
(video
),
61 'age_limit': parse_age_limit(video
.get('mpaa_rating') or video
.get('tv_rating')),
62 'categories': categories
,
63 'series': show_info
.get('show_name'),
64 'season_number': int_or_none(show_info
.get('season_num')),
65 'season_id': show_info
.get('season_id'),
66 'episode_number': int_or_none(show_info
.get('episode_num')),
70 class AsianCrushIE(AsianCrushBaseIE
):
71 _VALID_URL
= r
'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE
._VALID
_URL
_BASE
73 'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
74 'md5': 'c3b740e48d0ba002a42c0b72857beae6',
78 'title': 'Women Who Flirt',
79 'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
80 'timestamp': 1496936429,
81 'upload_date': '20170608',
82 'uploader_id': 'craig@crifkin.com',
84 'categories': 'count:5',
88 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
89 'only_matching': True,
91 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
92 'only_matching': True,
94 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
95 'only_matching': True,
97 'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
98 'only_matching': True,
100 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
101 'only_matching': True,
103 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
104 'only_matching': True,
106 'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
107 'only_matching': True,
110 def _real_extract(self
, url
):
111 host
, video_id
= self
._match
_valid
_url
(url
).groups()
113 if host
== 'cocoro.tv':
114 webpage
= self
._download
_webpage
(url
, video_id
)
115 embed_vars
= self
._parse
_json
(self
._search
_regex
(
116 r
'iEmbedVars\s*=\s*({.+?})', webpage
, 'embed vars',
117 default
='{}'), video_id, fatal=False) or {}
118 video_id = embed_vars.get('entry_id
') or video_id
120 video = self._download_object_data(host, video_id, 'video
')
121 return self._parse_video_data(video)
124 class AsianCrushPlaylistIE(AsianCrushBaseIE):
125 _VALID_URL = r'%s/series
/0+(?P
<id>\d
+)s
\b' % AsianCrushBaseIE._VALID_URL_BASE
127 'url
': 'https
://www
.asiancrush
.com
/series
/006447s
/fruity
-samurai
',
130 'title
': 'Fruity Samurai
',
131 'description
': 'md5
:7535174487e4a202d3872a7fc8f2f154
',
133 'playlist_count
': 13,
135 'url
': 'https
://www
.yuyutv
.com
/series
/013920s
/peep
-show
/',
136 'only_matching
': True,
138 'url
': 'https
://www
.midnightpulp
.com
/series
/016375s
/mononoke
/',
139 'only_matching
': True,
141 'url
': 'https
://www
.cocoro
.tv
/series
/008549s
/the
-wonderful
-wizard
-of
-oz
/',
142 'only_matching
': True,
144 'url
': 'https
://www
.retrocrush
.tv
/series
/012355s
/true
-tears
',
145 'only_matching
': True,
147 _PAGE_SIZE = 1000000000
149 def _fetch_page(self, domain, parent_id, page):
150 videos = self._call_api(
151 domain, 'getreferencedobjects
', parent_id, {
152 'max': self._PAGE_SIZE,
153 'object_type
': 'video
',
154 'parent_id
': parent_id,
155 'start
': page * self._PAGE_SIZE,
156 }, 'page
%d' % (page + 1))
158 yield self._parse_video_data(video)
160 def _real_extract(self, url):
161 host, playlist_id = self._match_valid_url(url).groups()
163 if host == 'cocoro
.tv
':
164 webpage = self._download_webpage(url, playlist_id)
168 for mobj in re.finditer(
169 r'<a
[^
>]+href
=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
171 attrs = extract_attributes(mobj.group(0))
172 if attrs.get('class') == 'clearfix':
173 entries.append(self.url_result(
174 mobj.group('url'), ie=AsianCrushIE.ie_key()))
176 title = self._html_search_regex(
177 r'(?s)<h1\b[^>]\bid=["\']movieTitle
[^
>]+>(.+?
)</h1
>', webpage,
178 'title
', default=None) or self._og_search_title(
179 webpage, default=None) or self._html_search_meta(
180 'twitter
:title
', webpage, 'title
',
181 default=None) or self._html_extract_title(webpage)
183 title = re.sub(r'\s
*\|\s
*.+?$
', '', title)
185 description = self._og_search_description(
186 webpage, default=None) or self._html_search_meta(
187 'twitter
:description
', webpage, 'description
', fatal=False)
189 show = self._download_object_data(host, playlist_id, 'show
')
190 title = show.get('name
')
191 description = self._get_object_description(show)
192 entries = OnDemandPagedList(
193 functools.partial(self._fetch_page, host, playlist_id),
196 return self.playlist_result(entries, playlist_id, title, description)