]> jfr.im git - yt-dlp.git/blame - test/test_subtitles.py
[cleanup] Mark some compat variables for removal (#2173)
[yt-dlp.git] / test / test_subtitles.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
a0f59cdc 2from __future__ import unicode_literals
ae84f879
JMF
3
4# Allow direct execution
5import os
6import sys
7import unittest
8sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
060ac762 10from test.helper import FakeYDL, md5, is_download_test
ae84f879
JMF
11
12
7a5c1cfe 13from yt_dlp.extractor import (
ae84f879
JMF
14 YoutubeIE,
15 DailymotionIE,
4259402c 16 TedTalkIE,
4edff4cf 17 VimeoIE,
7bc8780c 18 WallaIE,
0b54a5b1 19 CeskaTelevizeIE,
311c3938 20 LyndaIE,
b9b42f2e 21 NPOIE,
a7e999be 22 PBSIE,
0af25f78 23 ComedyCentralIE,
01561da1 24 NRKTVIE,
c4d6fc6d 25 RaiPlayIE,
4f7cea6c 26 VikiIE,
8807f127 27 ThePlatformIE,
f908b74f 28 ThePlatformFeedIE,
25ac63ed 29 RTVEALaCartaIE,
66d041f2 30 DemocracynowIE,
ae84f879
JMF
31)
32
33
060ac762 34@is_download_test
ae84f879
JMF
35class BaseTestSubtitles(unittest.TestCase):
36 url = None
37 IE = None
5f6a1245 38
ae84f879
JMF
39 def setUp(self):
40 self.DL = FakeYDL()
a504ced0
JMF
41 self.ie = self.IE()
42 self.DL.add_info_extractor(self.ie)
ae84f879
JMF
43
44 def getInfoDict(self):
a504ced0 45 info_dict = self.DL.extract_info(self.url, download=False)
ae84f879
JMF
46 return info_dict
47
48 def getSubtitles(self):
49 info_dict = self.getInfoDict()
c84dd8a9 50 subtitles = info_dict['requested_subtitles']
a504ced0
JMF
51 if not subtitles:
52 return subtitles
53 for sub_info in subtitles.values():
54 if sub_info.get('data') is None:
55 uf = self.DL.urlopen(sub_info['url'])
56 sub_info['data'] = uf.read().decode('utf-8')
57 return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
ae84f879
JMF
58
59
060ac762 60@is_download_test
ae84f879
JMF
61class TestYoutubeSubtitles(BaseTestSubtitles):
62 url = 'QRS8MkLhQmM'
63 IE = YoutubeIE
64
ae84f879
JMF
65 def test_youtube_allsubtitles(self):
66 self.DL.params['writesubtitles'] = True
67 self.DL.params['allsubtitles'] = True
68 subtitles = self.getSubtitles()
69 self.assertEqual(len(subtitles.keys()), 13)
8848f808 70 self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
71 self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
920d21b9 72 for lang in ['fr', 'de']:
b7bb76df 73 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 74
920d21b9 75 def test_youtube_subtitles_ttml_format(self):
ae84f879 76 self.DL.params['writesubtitles'] = True
920d21b9 77 self.DL.params['subtitlesformat'] = 'ttml'
ae84f879 78 subtitles = self.getSubtitles()
8848f808 79 self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
ae84f879
JMF
80
81 def test_youtube_subtitles_vtt_format(self):
82 self.DL.params['writesubtitles'] = True
83 self.DL.params['subtitlesformat'] = 'vtt'
84 subtitles = self.getSubtitles()
8848f808 85 self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
ae84f879 86
ae84f879
JMF
87 def test_youtube_automatic_captions(self):
88 self.url = '8YoUxe5ncPo'
89 self.DL.params['writeautomaticsub'] = True
90 self.DL.params['subtitleslangs'] = ['it']
91 subtitles = self.getSubtitles()
92 self.assertTrue(subtitles['it'] is not None)
93
8848f808 94 def test_youtube_no_automatic_captions(self):
95 self.url = 'QRS8MkLhQmM'
96 self.DL.params['writeautomaticsub'] = True
97 subtitles = self.getSubtitles()
98 self.assertTrue(not subtitles)
99
7d900ef1
JMF
100 def test_youtube_translated_subtitles(self):
101 # This video has a subtitles track, which can be translated
8848f808 102 self.url = 'i0ZabxXmH4Y'
7d900ef1
JMF
103 self.DL.params['writeautomaticsub'] = True
104 self.DL.params['subtitleslangs'] = ['it']
105 subtitles = self.getSubtitles()
106 self.assertTrue(subtitles['it'] is not None)
107
ae84f879 108 def test_youtube_nosubtitles(self):
a0f59cdc 109 self.DL.expect_warning('video doesn\'t have subtitles')
345e3783 110 self.url = 'n5BB19UTcdA'
ae84f879
JMF
111 self.DL.params['writesubtitles'] = True
112 self.DL.params['allsubtitles'] = True
113 subtitles = self.getSubtitles()
4d171848 114 self.assertFalse(subtitles)
ae84f879 115
ae84f879 116
060ac762 117@is_download_test
ae84f879
JMF
118class TestDailymotionSubtitles(BaseTestSubtitles):
119 url = 'http://www.dailymotion.com/video/xczg00'
120 IE = DailymotionIE
121
ae84f879
JMF
122 def test_allsubtitles(self):
123 self.DL.params['writesubtitles'] = True
124 self.DL.params['allsubtitles'] = True
125 subtitles = self.getSubtitles()
8fb474fb 126 self.assertTrue(len(subtitles.keys()) >= 6)
b7bb76df
JMF
127 self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
128 self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
129 for lang in ['es', 'fr', 'de']:
130 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 131
ae84f879 132 def test_nosubtitles(self):
a0f59cdc 133 self.DL.expect_warning('video doesn\'t have subtitles')
ae84f879
JMF
134 self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
135 self.DL.params['writesubtitles'] = True
136 self.DL.params['allsubtitles'] = True
137 subtitles = self.getSubtitles()
4d171848 138 self.assertFalse(subtitles)
ae84f879 139
ae84f879 140
060ac762 141@is_download_test
ae84f879
JMF
142class TestTedSubtitles(BaseTestSubtitles):
143 url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
4259402c 144 IE = TedTalkIE
ae84f879 145
ae84f879
JMF
146 def test_allsubtitles(self):
147 self.DL.params['writesubtitles'] = True
148 self.DL.params['allsubtitles'] = True
149 subtitles = self.getSubtitles()
0321213c 150 self.assertTrue(len(subtitles.keys()) >= 28)
b7bb76df
JMF
151 self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
152 self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
153 for lang in ['es', 'fr', 'de']:
a0f59cdc 154 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 155
b4bcffef 156
060ac762 157@is_download_test
4edff4cf
S
158class TestVimeoSubtitles(BaseTestSubtitles):
159 url = 'http://vimeo.com/76979871'
160 IE = VimeoIE
b0268cb6 161
4edff4cf
S
162 def test_allsubtitles(self):
163 self.DL.params['writesubtitles'] = True
164 self.DL.params['allsubtitles'] = True
165 subtitles = self.getSubtitles()
166 self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
b7bb76df
JMF
167 self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
168 self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
4edff4cf 169
4edff4cf 170 def test_nosubtitles(self):
a0f59cdc 171 self.DL.expect_warning('video doesn\'t have subtitles')
4edff4cf
S
172 self.url = 'http://vimeo.com/56015672'
173 self.DL.params['writesubtitles'] = True
174 self.DL.params['allsubtitles'] = True
175 subtitles = self.getSubtitles()
4d171848 176 self.assertFalse(subtitles)
4edff4cf 177
4edff4cf 178
060ac762 179@is_download_test
0ca41c3d 180class TestWallaSubtitles(BaseTestSubtitles):
7bc8780c
S
181 url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
182 IE = WallaIE
183
7bc8780c 184 def test_allsubtitles(self):
a0f59cdc 185 self.DL.expect_warning('Automatic Captions not supported by this server')
7bc8780c
S
186 self.DL.params['writesubtitles'] = True
187 self.DL.params['allsubtitles'] = True
188 subtitles = self.getSubtitles()
189 self.assertEqual(set(subtitles.keys()), set(['heb']))
190 self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
191
192 def test_nosubtitles(self):
a0f59cdc 193 self.DL.expect_warning('video doesn\'t have subtitles')
7bc8780c
S
194 self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
195 self.DL.params['writesubtitles'] = True
196 self.DL.params['allsubtitles'] = True
197 subtitles = self.getSubtitles()
4d171848 198 self.assertFalse(subtitles)
7bc8780c
S
199
200
060ac762 201@is_download_test
0b54a5b1
S
202class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
203 url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
204 IE = CeskaTelevizeIE
205
0b54a5b1
S
206 def test_allsubtitles(self):
207 self.DL.expect_warning('Automatic Captions not supported by this server')
208 self.DL.params['writesubtitles'] = True
209 self.DL.params['allsubtitles'] = True
210 subtitles = self.getSubtitles()
211 self.assertEqual(set(subtitles.keys()), set(['cs']))
8fb474fb 212 self.assertTrue(len(subtitles['cs']) > 20000)
0b54a5b1
S
213
214 def test_nosubtitles(self):
215 self.DL.expect_warning('video doesn\'t have subtitles')
216 self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
217 self.DL.params['writesubtitles'] = True
218 self.DL.params['allsubtitles'] = True
219 subtitles = self.getSubtitles()
4d171848 220 self.assertFalse(subtitles)
0b54a5b1
S
221
222
060ac762 223@is_download_test
311c3938
JMF
224class TestLyndaSubtitles(BaseTestSubtitles):
225 url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
226 IE = LyndaIE
227
228 def test_allsubtitles(self):
229 self.DL.params['writesubtitles'] = True
230 self.DL.params['allsubtitles'] = True
231 subtitles = self.getSubtitles()
232 self.assertEqual(set(subtitles.keys()), set(['en']))
233 self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
234
235
060ac762 236@is_download_test
b9b42f2e
JMF
237class TestNPOSubtitles(BaseTestSubtitles):
238 url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
239 IE = NPOIE
240
241 def test_allsubtitles(self):
242 self.DL.params['writesubtitles'] = True
243 self.DL.params['allsubtitles'] = True
244 subtitles = self.getSubtitles()
245 self.assertEqual(set(subtitles.keys()), set(['nl']))
246 self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
247
248
060ac762 249@is_download_test
0af25f78 250class TestMTVSubtitles(BaseTestSubtitles):
518d5ba5 251 url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
0af25f78
JMF
252 IE = ComedyCentralIE
253
254 def getInfoDict(self):
255 return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
256
257 def test_allsubtitles(self):
258 self.DL.params['writesubtitles'] = True
259 self.DL.params['allsubtitles'] = True
260 subtitles = self.getSubtitles()
261 self.assertEqual(set(subtitles.keys()), set(['en']))
518d5ba5 262 self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
0af25f78
JMF
263
264
060ac762 265@is_download_test
01561da1
JMF
266class TestNRKSubtitles(BaseTestSubtitles):
267 url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
268 IE = NRKTVIE
269
270 def test_allsubtitles(self):
271 self.DL.params['writesubtitles'] = True
272 self.DL.params['allsubtitles'] = True
273 subtitles = self.getSubtitles()
274 self.assertEqual(set(subtitles.keys()), set(['no']))
f0bfaa2d 275 self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
01561da1
JMF
276
277
060ac762 278@is_download_test
c4d6fc6d 279class TestRaiPlaySubtitles(BaseTestSubtitles):
c4d6fc6d 280 IE = RaiPlayIE
afbdd3ac 281
00dd0cd5 282 def test_subtitles_key(self):
283 self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
afbdd3ac
JMF
284 self.DL.params['writesubtitles'] = True
285 self.DL.params['allsubtitles'] = True
286 subtitles = self.getSubtitles()
287 self.assertEqual(set(subtitles.keys()), set(['it']))
288 self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
289
00dd0cd5 290 def test_subtitles_array_key(self):
291 self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
292 self.DL.params['writesubtitles'] = True
293 self.DL.params['allsubtitles'] = True
294 subtitles = self.getSubtitles()
295 self.assertEqual(set(subtitles.keys()), set(['it']))
296 self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
297
afbdd3ac 298
060ac762 299@is_download_test
4f7cea6c
JMF
300class TestVikiSubtitles(BaseTestSubtitles):
301 url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
302 IE = VikiIE
303
304 def test_allsubtitles(self):
305 self.DL.params['writesubtitles'] = True
306 self.DL.params['allsubtitles'] = True
307 subtitles = self.getSubtitles()
308 self.assertEqual(set(subtitles.keys()), set(['en']))
80970e53 309 self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
4f7cea6c
JMF
310
311
060ac762 312@is_download_test
8807f127
JMF
313class TestThePlatformSubtitles(BaseTestSubtitles):
314 # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
315 # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
316 url = 'theplatform:JFUjUE1_ehvq'
317 IE = ThePlatformIE
318
319 def test_allsubtitles(self):
320 self.DL.params['writesubtitles'] = True
321 self.DL.params['allsubtitles'] = True
322 subtitles = self.getSubtitles()
323 self.assertEqual(set(subtitles.keys()), set(['en']))
324 self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
325
326
060ac762 327@is_download_test
f908b74f
YCH
328class TestThePlatformFeedSubtitles(BaseTestSubtitles):
329 url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
330 IE = ThePlatformFeedIE
331
332 def test_allsubtitles(self):
333 self.DL.params['writesubtitles'] = True
334 self.DL.params['allsubtitles'] = True
335 subtitles = self.getSubtitles()
336 self.assertEqual(set(subtitles.keys()), set(['en']))
337 self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
338
339
060ac762 340@is_download_test
25ac63ed
JMF
341class TestRtveSubtitles(BaseTestSubtitles):
342 url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
343 IE = RTVEALaCartaIE
344
345 def test_allsubtitles(self):
346 print('Skipping, only available from Spain')
347 return
348 self.DL.params['writesubtitles'] = True
349 self.DL.params['allsubtitles'] = True
350 subtitles = self.getSubtitles()
351 self.assertEqual(set(subtitles.keys()), set(['es']))
352 self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
353
354
060ac762 355@is_download_test
66d041f2
YCH
356class TestDemocracynowSubtitles(BaseTestSubtitles):
357 url = 'http://www.democracynow.org/shows/2015/7/3'
358 IE = DemocracynowIE
359
360 def test_allsubtitles(self):
361 self.DL.params['writesubtitles'] = True
362 self.DL.params['allsubtitles'] = True
363 subtitles = self.getSubtitles()
364 self.assertEqual(set(subtitles.keys()), set(['en']))
365 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
366
367 def test_subtitles_in_page(self):
368 self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
369 self.DL.params['writesubtitles'] = True
370 self.DL.params['allsubtitles'] = True
371 subtitles = self.getSubtitles()
372 self.assertEqual(set(subtitles.keys()), set(['en']))
373 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
374
375
a7e999be 376@is_download_test
377class TestPBSSubtitles(BaseTestSubtitles):
378 url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/'
379 IE = PBSIE
380
381 def test_allsubtitles(self):
382 self.DL.params['writesubtitles'] = True
383 self.DL.params['allsubtitles'] = True
384 subtitles = self.getSubtitles()
385 self.assertEqual(set(subtitles.keys()), set(['en']))
386
387 def test_subtitles_dfxp_format(self):
388 self.DL.params['writesubtitles'] = True
389 self.DL.params['subtitlesformat'] = 'dfxp'
390 subtitles = self.getSubtitles()
391 self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b'])
392
393 def test_subtitles_vtt_format(self):
394 self.DL.params['writesubtitles'] = True
395 self.DL.params['subtitlesformat'] = 'vtt'
396 subtitles = self.getSubtitles()
397 self.assertIn(
398 md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73'])
399
400 def test_subtitles_srt_format(self):
401 self.DL.params['writesubtitles'] = True
402 self.DL.params['subtitlesformat'] = 'srt'
403 subtitles = self.getSubtitles()
404 self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371'])
405
406 def test_subtitles_sami_format(self):
407 self.DL.params['writesubtitles'] = True
408 self.DL.params['subtitlesformat'] = 'sami'
409 subtitles = self.getSubtitles()
410 self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd'])
411
412
ae84f879
JMF
413if __name__ == '__main__':
414 unittest.main()