4 from .common
import InfoExtractor
14 class WykopBaseExtractor(InfoExtractor
):
15 def _get_token(self
, force_refresh
=False):
17 maybe_cached
= self
.cache
.load('wykop', 'bearer')
21 new_token
= traverse_obj(
22 self
._do
_call
_api
('auth', None, 'Downloading anonymous auth token', data
={
23 # hardcoded in frontend
24 'key': 'w53947240748',
25 'secret': 'd537d9e0a7adc1510842059ae5316419',
26 }), ('data', 'token'))
28 self
.cache
.store('wykop', 'bearer', new_token
)
31 def _do_call_api(self
, path
, video_id
, note
='Downloading JSON metadata', data
=None, headers
={}):
33 data
= json
.dumps({'data': data}
).encode()
34 headers
['Content-Type'] = 'application/json'
36 return self
._download
_json
(
37 f
'https://wykop.pl/api/v3/{path}', video_id
,
38 note
=note
, data
=data
, headers
=headers
)
40 def _call_api(self
, path
, video_id
, note
='Downloading JSON metadata'):
41 token
= self
._get
_token
()
42 for retrying
in range(2):
44 return self
._do
_call
_api
(path
, video_id
, note
, headers
={'Authorization': f'Bearer {token}
'})
45 except ExtractorError as e:
46 if not retrying and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
47 token = self._get_token(True)
51 def _common_data_extract(self, data):
52 author = traverse_obj(data, ('author
', 'username
'), expected_type=str)
55 '_type
': 'url_transparent
',
56 'display_id
': data.get('slug
'),
57 'url
': traverse_obj(data,
58 ('media
', 'embed
', 'url
'), # what gets an iframe embed
59 ('source
', 'url
'), # clickable url (dig only)
60 expected_type=url_or_none),
61 'thumbnail
': traverse_obj(
62 data, ('media
', 'photo
', 'url
'), ('media
', 'embed
', 'thumbnail
'), expected_type=url_or_none),
64 'uploader_id
': author,
65 'uploader_url
': format_field(author, None, 'https
://wykop
.pl
/ludzie
/%s'),
66 'timestamp
': parse_iso8601(data.get('created_at
'), delimiter=' '), # time it got submitted
67 'like_count
': traverse_obj(data, ('votes
', 'up
'), expected_type=int),
68 'dislike_count
': traverse_obj(data, ('votes
', 'down
'), expected_type=int),
69 'comment_count
': traverse_obj(data, ('comments
', 'count
'), expected_type=int),
70 'age_limit
': 18 if data.get('adult
') else 0,
71 'tags
': data.get('tags
'),
75 class WykopDigIE(WykopBaseExtractor):
77 _VALID_URL = r'https?
://(?
:www\
.)?wykop\
.pl
/link
/(?P
<id>\d
+)'
80 'url
': 'https
://wykop
.pl
/link
/6912923/najbardziej
-zrzedliwy
-kot
-na
-swiecie
-i
-frozen
-planet
-ii
-i
-bbc
-earth
',
84 'title
': 'Najbardziej zrzędliwy kot na świecie I Frozen Planet II I BBC Earth
',
85 'display_id
': 'najbardziej
-zrzedliwy
-kot
-na
-swiecie
-i
-frozen
-planet
-ii
-i
-bbc
-earth
',
86 'description
': 'md5
:ac0f87dea1cdcb6b0c53f3612a095c87
',
87 'tags
': ['zwierzaczki
', 'koty
', 'smiesznykotek
', 'humor
', 'rozrywka
', 'ciekawostki
'],
89 'timestamp
': 1669154480,
90 'release_timestamp
': 1669194241,
91 'release_date
': '20221123',
92 'uploader
': 'starnak
',
93 'uploader_id
': 'starnak
',
94 'uploader_url
': 'https
://wykop
.pl
/ludzie
/starnak
',
98 'thumbnail
': r're
:https?
://wykop\
.pl
/cdn
/.+',
100 'channel
': 'BBC Earth
',
101 'channel_id
': 'UCwmZiChSryoWQCZMIQezgTg
',
102 'channel_url
': 'https
://www
.youtube
.com
/channel
/UCwmZiChSryoWQCZMIQezgTg
',
103 'categories
': ['Pets
& Animals
'],
104 'upload_date
': '20220923',
106 'channel_follower_count
': int,
107 'availability
': 'public
',
108 'live_status
': 'not_live
',
109 'playable_in_embed
': True,
114 def suitable(cls, url):
115 return cls._match_valid_url(url) and not WykopDigCommentIE.suitable(url)
117 def _real_extract(self, url):
118 video_id = self._match_id(url)
119 data = self._call_api(f'links
/{video_id}
', video_id)['data
']
122 **self._common_data_extract(data),
124 'title
': data['title
'],
125 'description
': data.get('description
'),
126 # time it got "digged" to the homepage
127 'release_timestamp
': parse_iso8601(data.get('published_at
'), delimiter=' '),
131 class WykopDigCommentIE(WykopBaseExtractor):
132 IE_NAME = 'wykop
:dig
:comment
'
133 _VALID_URL = r'https?
://(?
:www\
.)?wykop\
.pl
/link
/(?P
<dig_id
>\d
+)/[^
/]+/komentarz
/(?P
<id>\d
+)'
136 'url
': 'https
://wykop
.pl
/link
/6992589/strollowal
-oszusta
-przez
-ponad
-24-minuty
-udawal
-naiwniaka
-i
-nagral
-rozmowe
/komentarz
/114540527/podobna
-sytuacja
-ponizej
-ciekawa
-dyskusja
-z
-oszustem
-na
-sam
-koniec
-sam
-bylem
-w
-biurze
-swiadkiem
-podobnej
-rozmowy
-niemal
-zakonczonej
-sukcesem
-bandyty
-g
',
140 'title
': 'md5
:e7c741c5baa7ed6478000caf72865577
',
141 'display_id
': 'md5
:45b2d12bd0e262d09cc7cf7abc8412db
',
142 'description
': 'md5
:bcec7983429f9c0630f9deb9d3d1ba5e
',
143 'timestamp
': 1674476945,
144 'uploader
': 'Bartholomew
',
145 'uploader_id
': 'Bartholomew
',
146 'uploader_url
': 'https
://wykop
.pl
/ludzie
/Bartholomew
',
147 'thumbnail
': r're
:https?
://wykop\
.pl
/cdn
/.+',
149 'availability
': 'public
',
151 'upload_date
': '20230117',
152 'categories
': ['Entertainment
'],
155 'dislike_count
': int,
156 'comment_count
': int,
157 'channel_follower_count
': int,
158 'playable_in_embed
': True,
159 'live_status
': 'not_live
',
161 'chapters
': 'count
:3',
162 'channel
': 'Poszukiwacze Okazji
',
163 'channel_id
': 'UCzzvJDZThwv06dR4xmzrZBw
',
164 'channel_url
': 'https
://www
.youtube
.com
/channel
/UCzzvJDZThwv06dR4xmzrZBw
',
168 def _real_extract(self, url):
169 dig_id, comment_id = self._search_regex(self._VALID_URL, url, 'dig
and comment ids
', group=('dig_id
', 'id'))
170 data = self._call_api(f'links
/{dig_id}
/comments
/{comment_id}
', comment_id)['data
']
173 **self._common_data_extract(data),
175 'title
': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
176 'description
': data.get('content
'),
180 class WykopPostIE(WykopBaseExtractor):
181 IE_NAME = 'wykop
:post
'
182 _VALID_URL = r'https?
://(?
:www\
.)?wykop\
.pl
/wpis
/(?P
<id>\d
+)'
185 'url
': 'https
://wykop
.pl
/wpis
/68893343/kot
-koty
-smiesznykotek
',
187 'id': 'PL8JMjiUPHUhwc9ZlKa_5IFeBwBV8Xe7jI
',
188 'title
': 'PawelW124
- #kot #koty #smiesznykotek',
189 'description': '#kot #koty #smiesznykotek',
190 'display_id': 'kot-koty-smiesznykotek',
191 'tags': ['kot', 'koty', 'smiesznykotek'],
192 'uploader': 'PawelW124',
193 'uploader_id': 'PawelW124',
194 'uploader_url': 'https://wykop.pl/ludzie/PawelW124',
195 'timestamp': 1668938142,
198 'dislike_count': int,
199 'thumbnail': r
're:https?://wykop\.pl/cdn/.+',
200 'comment_count': int,
202 'channel_id': 'UCW9T_-uZoiI7ROARQdTDyOw',
203 'channel_url': 'https://www.youtube.com/channel/UCW9T_-uZoiI7ROARQdTDyOw',
204 'upload_date': '20221120',
205 'modified_date': '20220814',
206 'availability': 'public',
209 'playlist_mincount': 15,
211 'flat_playlist': True,
216 def suitable(cls
, url
):
217 return cls
._match
_valid
_url
(url
) and not WykopPostCommentIE
.suitable(url
)
219 def _real_extract(self
, url
):
220 video_id
= self
._match
_id
(url
)
221 data
= self
._call
_api
(f
'entries/{video_id}', video_id
)['data']
224 **self
._common
_data
_extract
(data
),
226 'title': f
"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
227 'description': data
.get('content'),
231 class WykopPostCommentIE(WykopBaseExtractor
):
232 IE_NAME
= 'wykop:post:comment'
233 _VALID_URL
= r
'https?://(?:www\.)?wykop\.pl/wpis/(?P<post_id>\d+)/[^/#]+#(?P<id>\d+)'
236 'url': 'https://wykop.pl/wpis/70084873/test-test-test#249303979',
238 'id': 'confusedquickarmyant',
240 'title': 'tpap - treść komentarza',
241 'display_id': 'tresc-komentarza',
242 'description': 'treść komentarza',
244 'uploader_id': 'tpap',
245 'uploader_url': 'https://wykop.pl/ludzie/tpap',
246 'timestamp': 1675349470,
247 'upload_date': '20230202',
254 'dislike_count': int,
255 'thumbnail': r
're:https?://wykop\.pl/cdn/.+',
259 def _real_extract(self
, url
):
260 post_id
, comment_id
= self
._search
_regex
(self
._VALID
_URL
, url
, 'post and comment ids', group
=('post_id', 'id'))
261 data
= self
._call
_api
(f
'entries/{post_id}/comments/{comment_id}', comment_id
)['data']
264 **self
._common
_data
_extract
(data
),
266 'title': f
"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
267 'description': data
.get('content'),