]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/samplefocus.py
3 from . common
import InfoExtractor
6 get_element_by_attribute
,
11 class SampleFocusIE ( InfoExtractor
):
12 _VALID_URL
= r
'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
14 'url' : 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar' ,
15 'md5' : '48c8d62d60be467293912e0e619a5120' ,
18 'display_id' : 'lil-peep-sad-emo-guitar' ,
20 'title' : 'Lil Peep Sad Emo Guitar' ,
21 'thumbnail' : r
're:^https?://.+\.png' ,
22 'license' : 'Standard License' ,
23 'uploader' : 'CapsCtrl' ,
24 'uploader_id' : 'capsctrl' ,
27 'categories' : [ 'Samples' , 'Guitar' , 'Electric guitar' ],
30 'url' : 'https://samplefocus.com/samples/dababy-style-bass-808' ,
33 'url' : 'https://samplefocus.com/samples/young-chop-kick' ,
37 def _real_extract ( self
, url
):
38 display_id
= self
._ match
_ id
( url
)
39 webpage
= self
._ download
_ webpage
( url
, display_id
)
41 sample_id
= self
._ search
_ regex
(
42 r
'<input[^>]+id=(["\' ]) sample_id\
1 [ ^
>]+ value
=( ?
:[ " \' ])(?P<id>\d+)',
43 webpage, 'sample id', group='id')
45 title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
46 r'<h1>(.+?)</h1>', webpage, 'title')
48 mp3_url = self._search_regex(
49 r'<input[^>]+id=([" \' ]) sample_mp3\
1 [ ^
>]+ value
=([ " \' ])(?P<url>(?:(?!\2).)+)',
50 webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
51 r'<meta[^>]+itemprop=([" \' ]) contentUrl\
1 [ ^
>]*> ',
52 webpage, ' mp3 url
', group=0))[' content
']
54 thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
55 r' < img
[ ^
>]+ class =( ?
:[ " \' ])waveform responsive-img[^>]+src=([" \' ])( ?P
< url
>( ?
:( ?
! \
1 ).)+) ',
56 webpage, ' mp3
', fatal=False, group=' url
')
59 for author_id, author, body in re.findall(r' ( ?s
)< p
[ ^
>]+ class = "comment-author" >< a
[ ^
>]+ href
= "/users/([^" ]+) ">([^" ]+)</ a
>.+ ?
< p
[ ^
>]+ class = "comment-body" >([ ^
>]+)</ p
> ', webpage):
62 ' author_id
': author_id,
66 uploader_id = uploader = None
67 mobj = re.search(r' > By
< a
[ ^
>]+ href
= "/users/([^" ]+) "[^>]*>([^<]+)', webpage)
69 uploader_id, uploader = mobj.groups()
71 breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
74 for _, name in re.findall(r'<span[^>]+property=([" \' ]) name\
1 [ ^
>]*>([ ^
<]+) ', breadcrumb):
75 categories.append(name)
77 def extract_count(klass):
78 return int_or_none(self._html_search_regex(
79 r' < span
[ ^
>]+ class =( ?
:[ " \' ])? %s-cou nt[^>]*>(\d+)' % klass,
80 webpage, klass, fatal=False))
86 'display_id': display_id,
87 'thumbnail': thumbnail,
89 'license': self._html_search_regex(
90 r'<a[^>]+href=([" \' ])/ license\
1 [ ^
>]*>( ?P
< license
>[ ^
<]+)< ',
91 webpage, ' license
', fatal=False, group=' license
'),
92 ' uploader_id
': uploader_id,
93 ' like_count
': extract_count(' sample
- %s-f avorites
' % sample_id),
94 ' comment_count
': extract_count(' comments
'),
96 ' categories
': categories,