]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/yappy.py
[cleanup] Misc (#8510)
[yt-dlp.git] / yt_dlp / extractor / yappy.py
CommitLineData
36163001
H
1from .common import InfoExtractor
2from ..utils import (
6f69101d 3 OnDemandPagedList,
36163001
H
4 int_or_none,
5 traverse_obj,
6 unified_timestamp,
6f69101d 7 url_or_none,
36163001
H
8)
9
10
11class YappyIE(InfoExtractor):
12 _VALID_URL = r'https?://yappy\.media/video/(?P<id>\w+)'
13 _TESTS = [{
14 'url': 'https://yappy.media/video/47fea6d8586f48d1a0cf96a7342aabd2',
15 'info_dict': {
16 'id': '47fea6d8586f48d1a0cf96a7342aabd2',
17 'ext': 'mp4',
18 'title': 'Куда нажимать? Как снимать? Смотри видос и погнали!🤘🏻',
19 'timestamp': 1661893200,
20 'description': 'Куда нажимать? Как снимать? Смотри видос и погнали!🤘🏻',
21 'thumbnail': 'https://cdn-st.ritm.media/static/pic/thumbnails/0c7c4d73388f47848acaf540d2e2bb8c-thumbnail.jpg',
22 'upload_date': '20220830',
23 'view_count': int,
24 'like_count': int,
25 'uploader_id': '59a0c8c485e5410b9c43474bf4c6a373',
26 'categories': ['Образование и наука', 'Лайфхак', 'Технологии', 'Арт/искусство'],
27 'repost_count': int,
28 'uploader': 'YAPPY',
29 }
30 }, {
31 'url': 'https://yappy.media/video/3862451954ad4bd58ae2ccefddb0bd33',
32 'info_dict': {
33 'id': '3862451954ad4bd58ae2ccefddb0bd33',
34 'ext': 'mp4',
35 'title': 'Опиши свой характер 3 словами🙃\n#психология #дружба #отношения',
36 'timestamp': 1674726985,
37 'like_count': int,
38 'description': 'Опиши свой характер 3 словами🙃\n#психология #дружба #отношения',
39 'uploader_id': '6793ee3581974a3586fc01e157de6c99',
40 'view_count': int,
41 'repost_count': int,
42 'uploader': 'LENA SHTURMAN',
43 'upload_date': '20230126',
44 'thumbnail': 'https://cdn-st.ritm.media/static/pic/user_thumbnails/6e76bb4bbad640b6/9ec84c115b2b1967/1674716171.jpg',
45 }
46 }]
47
48 def _real_extract(self, url):
49 video_id = self._match_id(url)
50 webpage = self._download_webpage(url, video_id)
51 json_ld = self._search_json_ld(webpage, video_id)
52 nextjs_data = self._search_nextjs_data(webpage, video_id)
53
54 media_data = (
55 traverse_obj(
56 nextjs_data, ('props', 'pageProps', ('data', 'OpenGraphParameters')), get_all=False)
57 or self._download_json(f'https://yappy.media/api/video/{video_id}', video_id))
58
59 media_url = traverse_obj(media_data, ('link', {url_or_none})) or ''
60 has_watermark = media_url.endswith('-wm.mp4')
61
62 formats = [{
63 'url': media_url,
64 'ext': 'mp4',
65 'format_note': 'Watermarked' if has_watermark else None,
66 'preference': -10 if has_watermark else None
67 }] if media_url else []
68
69 if has_watermark:
70 formats.append({
71 'url': media_url.replace('-wm.mp4', '.mp4'),
72 'ext': 'mp4'
73 })
74
75 audio_link = traverse_obj(media_data, ('audio', 'link'))
76 if audio_link:
77 formats.append({
78 'url': audio_link,
79 'ext': 'mp3',
80 'acodec': 'mp3',
81 'vcodec': 'none'
82 })
83
84 return {
85 'id': video_id,
86 'title': (json_ld.get('description') or self._html_search_meta(['og:title'], webpage)
87 or self._html_extract_title(webpage)),
88 'formats': formats,
89 'thumbnail': (media_data.get('thumbnail')
90 or self._html_search_meta(['og:image', 'og:image:secure_url'], webpage)),
91 'description': (media_data.get('description') or json_ld.get('description')
92 or self._html_search_meta(['description', 'og:description'], webpage)),
93 'timestamp': unified_timestamp(media_data.get('publishedAt') or json_ld.get('timestamp')),
94 'view_count': int_or_none(media_data.get('viewsCount') or json_ld.get('view_count')),
95 'like_count': int_or_none(media_data.get('likesCount')),
96 'uploader': traverse_obj(media_data, ('creator', 'firstName')),
97 'uploader_id': traverse_obj(media_data, ('creator', ('uuid', 'nickname')), get_all=False),
98 'categories': traverse_obj(media_data, ('categories', ..., 'name')) or None,
99 'repost_count': int_or_none(media_data.get('sharingCount'))
100 }
6f69101d
V
101
102
103class YappyProfileIE(InfoExtractor):
104 _VALID_URL = r'https?://yappy\.media/profile/(?P<id>\w+)'
105 _TESTS = [{
106 'url': 'https://yappy.media/profile/59a0c8c485e5410b9c43474bf4c6a373',
107 'info_dict': {
108 'id': '59a0c8c485e5410b9c43474bf4c6a373',
109 },
110 'playlist_mincount': 527,
111 }]
112
113 def _real_extract(self, url):
114 profile_id = self._match_id(url)
115
116 def fetch_page(page_num):
117 page_num += 1
118 videos = self._download_json(
119 f'https://yappy.media/api/video/list/{profile_id}?page={page_num}',
120 profile_id, f'Downloading profile page {page_num} JSON')
121
122 for video in traverse_obj(videos, ('results', lambda _, v: v['uuid'])):
123 yield self.url_result(
124 f'https://yappy.media/video/{video["uuid"]}', YappyIE,
125 video['uuid'], video.get('description'))
126
127 return self.playlist_result(OnDemandPagedList(fetch_page, 15), profile_id)