]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/samplefocus.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
9 get_element_by_attribute
,
14 class SampleFocusIE ( InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
17 'url' : 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar' ,
18 'md5' : '48c8d62d60be467293912e0e619a5120' ,
21 'display_id' : 'lil-peep-sad-emo-guitar' ,
23 'title' : 'Lil Peep Sad Emo Guitar' ,
24 'thumbnail' : r
're:^https?://.+\.png' ,
25 'license' : 'Standard License' ,
26 'uploader' : 'CapsCtrl' ,
27 'uploader_id' : 'capsctrl' ,
30 'categories' : [ 'Samples' , 'Guitar' , 'Electric guitar' ],
33 'url' : 'https://samplefocus.com/samples/dababy-style-bass-808' ,
36 'url' : 'https://samplefocus.com/samples/young-chop-kick' ,
40 def _real_extract ( self
, url
):
41 display_id
= self
._ match
_ id
( url
)
42 webpage
= self
._ download
_ webpage
( url
, display_id
)
44 sample_id
= self
._ search
_ regex
(
45 r
'<input[^>]+id=(["\' ]) sample_id\
1 [ ^
>]+ value
=( ?
:[ " \' ])(?P<id>\d+)',
46 webpage, 'sample id', group='id')
48 title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
49 r'<h1>(.+?)</h1>', webpage, 'title')
51 mp3_url = self._search_regex(
52 r'<input[^>]+id=([" \' ]) sample_mp3\
1 [ ^
>]+ value
=([ " \' ])(?P<url>(?:(?!\2).)+)',
53 webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
54 r'<meta[^>]+itemprop=([" \' ]) contentUrl\
1 [ ^
>]*> ',
55 webpage, ' mp3 url
', group=0))[' content
']
57 thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
58 r' < img
[ ^
>]+ class =( ?
:[ " \' ])waveform responsive-img[^>]+src=([" \' ])( ?P
< url
>( ?
:( ?
! \
1 ).)+) ',
59 webpage, ' mp3
', fatal=False, group=' url
')
62 for author_id, author, body in re.findall(r' ( ?s
)< p
[ ^
>]+ class = "comment-author" >< a
[ ^
>]+ href
= "/users/([^" ]+) ">([^" ]+)</ a
>.+ ?
< p
[ ^
>]+ class = "comment-body" >([ ^
>]+)</ p
> ', webpage):
65 ' author_id
': author_id,
69 uploader_id = uploader = None
70 mobj = re.search(r' > By
< a
[ ^
>]+ href
= "/users/([^" ]+) "[^>]*>([^<]+)', webpage)
72 uploader_id, uploader = mobj.groups()
74 breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
77 for _, name in re.findall(r'<span[^>]+property=([" \' ]) name\
1 [ ^
>]*>([ ^
<]+) ', breadcrumb):
78 categories.append(name)
80 def extract_count(klass):
81 return int_or_none(self._html_search_regex(
82 r' < span
[ ^
>]+ class =( ?
:[ " \' ])? %s-cou nt[^>]*>(\d+)' % klass,
83 webpage, klass, fatal=False))
89 'display_id': display_id,
90 'thumbnail': thumbnail,
92 'license': self._html_search_regex(
93 r'<a[^>]+href=([" \' ])/ license\
1 [ ^
>]*>( ?P
< license
>[ ^
<]+)< ',
94 webpage, ' license
', fatal=False, group=' license
'),
95 ' uploader_id
': uploader_id,
96 ' like_count
': extract_count(' sample
- %s-f avorites
' % sample_id),
97 ' comment_count
': extract_count(' comments
'),
99 ' categories
': categories,