]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/piapro.py
1 from .common
import InfoExtractor
2 from ..compat
import compat_urlparse
15 class PiaproIE(InfoExtractor
):
16 _NETRC_MACHINE
= 'piapro'
17 _VALID_URL
= r
'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?'
19 'url': 'https://piapro.jp/t/NXYR',
20 'md5': 'f7c0f760913fb1d44a1c45a4af793909',
25 'uploader_id': 'wowaka',
27 'description': 'http://www.nicovideo.jp/watch/sm8082467',
29 'timestamp': 1251785475,
30 'thumbnail': r
're:^https?://.*\.(?:png|jpg)$',
31 'upload_date': '20090901',
35 'note': 'There are break lines in description, mandating (?s) flag',
36 'url': 'https://piapro.jp/t/9cSd',
37 'md5': '952bb6d1e8de95050206408a87790676',
41 'title': '青に溶けた風船 / 初音ミク',
42 'description': 'md5:d395a9bd151447631a5a1460bc7f9132',
45 'timestamp': 1644030039,
46 'upload_date': '20220205',
48 'thumbnail': r
're:^https?://.*\.(?:png|jpg)$',
49 'uploader_id': 'cyankino',
52 'url': 'https://piapro.jp/content/hcw0z3a169wtemz6',
55 'url': 'https://piapro.jp/t/-SO-',
61 def _perform_login(self
, username
, password
):
64 '_username': username
,
65 '_password': password
,
69 self
._request
_webpage
('https://piapro.jp/login/', None)
70 urlh
= self
._request
_webpage
(
71 'https://piapro.jp/login/exe', None,
72 note
='Logging in', errnote
='Unable to log in',
73 data
=urlencode_postdata(login_form_strs
))
77 parts
= compat_urlparse
.urlparse(urlh
.url
)
82 'unable to log in: bad username or password')
83 self
._login
_status
= login_ok
85 def _real_extract(self
, url
):
86 video_id
= self
._match
_id
(url
)
87 webpage
= self
._download
_webpage
(url
, video_id
)
89 category_id
= self
._search
_regex
(r
'categoryId=(.+)">', webpage
, 'category ID')
90 if category_id
not in ('1', '2', '21', '22', '23', '24', '25'):
91 raise ExtractorError('The URL does not contain audio.', expected
=True)
93 def extract_info(name
, description
):
94 return self
._search
_regex
(rf
'{name}[::]\s*([\d\s,:/]+)\s*</p>', webpage
, description
, default
=None)
98 'title': clean_html(get_element_by_class('contents_title', webpage
)),
99 'description': clean_html(get_element_by_class('contents_description', webpage
)),
100 'uploader': clean_html(get_element_by_class('contents_creator_txt', webpage
)),
101 'uploader_id': self
._search
_regex
(
102 r
'<a\s+href="/([^"]+)"', get_element_by_class('contents_creator', webpage
), 'uploader id', default
=None),
103 'timestamp': unified_timestamp(extract_info('投稿日', 'timestamp'), False),
104 'duration': parse_duration(extract_info('長さ', 'duration')),
105 'view_count': str_to_int(extract_info('閲覧数', 'view count')),
106 'thumbnail': self
._html
_search
_meta
('twitter:image', webpage
),
107 'filesize_approx': parse_filesize((extract_info('サイズ', 'size') or '').replace(',', '')),
108 'url': self
._search
_regex
(r
'\"url\":\s*\"(.*?)\"', webpage
, 'url'),