]> jfr.im git - yt-dlp.git/blame - test/test_subtitles.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / test / test_subtitles.py
CommitLineData
ae84f879 1#!/usr/bin/env python
a0f59cdc 2from __future__ import unicode_literals
ae84f879
JMF
3
4# Allow direct execution
5import os
6import sys
7import unittest
8sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
d0efb9ec 10from test.helper import FakeYDL, md5
ae84f879
JMF
11
12
7a5c1cfe 13from yt_dlp.extractor import (
ae84f879
JMF
14 YoutubeIE,
15 DailymotionIE,
16 TEDIE,
4edff4cf 17 VimeoIE,
7bc8780c 18 WallaIE,
0b54a5b1 19 CeskaTelevizeIE,
311c3938 20 LyndaIE,
b9b42f2e 21 NPOIE,
0af25f78 22 ComedyCentralIE,
01561da1 23 NRKTVIE,
c4d6fc6d 24 RaiPlayIE,
4f7cea6c 25 VikiIE,
8807f127 26 ThePlatformIE,
f908b74f 27 ThePlatformFeedIE,
25ac63ed 28 RTVEALaCartaIE,
66d041f2 29 DemocracynowIE,
ae84f879
JMF
30)
31
32
33class BaseTestSubtitles(unittest.TestCase):
34 url = None
35 IE = None
5f6a1245 36
ae84f879
JMF
37 def setUp(self):
38 self.DL = FakeYDL()
a504ced0
JMF
39 self.ie = self.IE()
40 self.DL.add_info_extractor(self.ie)
ae84f879
JMF
41
42 def getInfoDict(self):
a504ced0 43 info_dict = self.DL.extract_info(self.url, download=False)
ae84f879
JMF
44 return info_dict
45
46 def getSubtitles(self):
47 info_dict = self.getInfoDict()
c84dd8a9 48 subtitles = info_dict['requested_subtitles']
a504ced0
JMF
49 if not subtitles:
50 return subtitles
51 for sub_info in subtitles.values():
52 if sub_info.get('data') is None:
53 uf = self.DL.urlopen(sub_info['url'])
54 sub_info['data'] = uf.read().decode('utf-8')
55 return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
ae84f879
JMF
56
57
58class TestYoutubeSubtitles(BaseTestSubtitles):
59 url = 'QRS8MkLhQmM'
60 IE = YoutubeIE
61
ae84f879
JMF
62 def test_youtube_allsubtitles(self):
63 self.DL.params['writesubtitles'] = True
64 self.DL.params['allsubtitles'] = True
65 subtitles = self.getSubtitles()
66 self.assertEqual(len(subtitles.keys()), 13)
8848f808 67 self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
68 self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
920d21b9 69 for lang in ['fr', 'de']:
b7bb76df 70 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 71
920d21b9 72 def test_youtube_subtitles_ttml_format(self):
ae84f879 73 self.DL.params['writesubtitles'] = True
920d21b9 74 self.DL.params['subtitlesformat'] = 'ttml'
ae84f879 75 subtitles = self.getSubtitles()
8848f808 76 self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
ae84f879
JMF
77
78 def test_youtube_subtitles_vtt_format(self):
79 self.DL.params['writesubtitles'] = True
80 self.DL.params['subtitlesformat'] = 'vtt'
81 subtitles = self.getSubtitles()
8848f808 82 self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
ae84f879 83
ae84f879
JMF
84 def test_youtube_automatic_captions(self):
85 self.url = '8YoUxe5ncPo'
86 self.DL.params['writeautomaticsub'] = True
87 self.DL.params['subtitleslangs'] = ['it']
88 subtitles = self.getSubtitles()
89 self.assertTrue(subtitles['it'] is not None)
90
8848f808 91 def test_youtube_no_automatic_captions(self):
92 self.url = 'QRS8MkLhQmM'
93 self.DL.params['writeautomaticsub'] = True
94 subtitles = self.getSubtitles()
95 self.assertTrue(not subtitles)
96
7d900ef1
JMF
97 def test_youtube_translated_subtitles(self):
98 # This video has a subtitles track, which can be translated
8848f808 99 self.url = 'i0ZabxXmH4Y'
7d900ef1
JMF
100 self.DL.params['writeautomaticsub'] = True
101 self.DL.params['subtitleslangs'] = ['it']
102 subtitles = self.getSubtitles()
103 self.assertTrue(subtitles['it'] is not None)
104
ae84f879 105 def test_youtube_nosubtitles(self):
a0f59cdc 106 self.DL.expect_warning('video doesn\'t have subtitles')
345e3783 107 self.url = 'n5BB19UTcdA'
ae84f879
JMF
108 self.DL.params['writesubtitles'] = True
109 self.DL.params['allsubtitles'] = True
110 subtitles = self.getSubtitles()
4d171848 111 self.assertFalse(subtitles)
ae84f879 112
ae84f879
JMF
113
114class TestDailymotionSubtitles(BaseTestSubtitles):
115 url = 'http://www.dailymotion.com/video/xczg00'
116 IE = DailymotionIE
117
ae84f879
JMF
118 def test_allsubtitles(self):
119 self.DL.params['writesubtitles'] = True
120 self.DL.params['allsubtitles'] = True
121 subtitles = self.getSubtitles()
8fb474fb 122 self.assertTrue(len(subtitles.keys()) >= 6)
b7bb76df
JMF
123 self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
124 self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
125 for lang in ['es', 'fr', 'de']:
126 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 127
ae84f879 128 def test_nosubtitles(self):
a0f59cdc 129 self.DL.expect_warning('video doesn\'t have subtitles')
ae84f879
JMF
130 self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
131 self.DL.params['writesubtitles'] = True
132 self.DL.params['allsubtitles'] = True
133 subtitles = self.getSubtitles()
4d171848 134 self.assertFalse(subtitles)
ae84f879 135
ae84f879
JMF
136
137class TestTedSubtitles(BaseTestSubtitles):
138 url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
139 IE = TEDIE
140
ae84f879
JMF
141 def test_allsubtitles(self):
142 self.DL.params['writesubtitles'] = True
143 self.DL.params['allsubtitles'] = True
144 subtitles = self.getSubtitles()
0321213c 145 self.assertTrue(len(subtitles.keys()) >= 28)
b7bb76df
JMF
146 self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
147 self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
148 for lang in ['es', 'fr', 'de']:
a0f59cdc 149 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 150
b4bcffef 151
4edff4cf
S
152class TestVimeoSubtitles(BaseTestSubtitles):
153 url = 'http://vimeo.com/76979871'
154 IE = VimeoIE
b0268cb6 155
4edff4cf
S
156 def test_allsubtitles(self):
157 self.DL.params['writesubtitles'] = True
158 self.DL.params['allsubtitles'] = True
159 subtitles = self.getSubtitles()
160 self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
b7bb76df
JMF
161 self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
162 self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
4edff4cf 163
4edff4cf 164 def test_nosubtitles(self):
a0f59cdc 165 self.DL.expect_warning('video doesn\'t have subtitles')
4edff4cf
S
166 self.url = 'http://vimeo.com/56015672'
167 self.DL.params['writesubtitles'] = True
168 self.DL.params['allsubtitles'] = True
169 subtitles = self.getSubtitles()
4d171848 170 self.assertFalse(subtitles)
4edff4cf 171
4edff4cf 172
0ca41c3d 173class TestWallaSubtitles(BaseTestSubtitles):
7bc8780c
S
174 url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
175 IE = WallaIE
176
7bc8780c 177 def test_allsubtitles(self):
a0f59cdc 178 self.DL.expect_warning('Automatic Captions not supported by this server')
7bc8780c
S
179 self.DL.params['writesubtitles'] = True
180 self.DL.params['allsubtitles'] = True
181 subtitles = self.getSubtitles()
182 self.assertEqual(set(subtitles.keys()), set(['heb']))
183 self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
184
185 def test_nosubtitles(self):
a0f59cdc 186 self.DL.expect_warning('video doesn\'t have subtitles')
7bc8780c
S
187 self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
188 self.DL.params['writesubtitles'] = True
189 self.DL.params['allsubtitles'] = True
190 subtitles = self.getSubtitles()
4d171848 191 self.assertFalse(subtitles)
7bc8780c
S
192
193
0b54a5b1
S
194class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
195 url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
196 IE = CeskaTelevizeIE
197
0b54a5b1
S
198 def test_allsubtitles(self):
199 self.DL.expect_warning('Automatic Captions not supported by this server')
200 self.DL.params['writesubtitles'] = True
201 self.DL.params['allsubtitles'] = True
202 subtitles = self.getSubtitles()
203 self.assertEqual(set(subtitles.keys()), set(['cs']))
8fb474fb 204 self.assertTrue(len(subtitles['cs']) > 20000)
0b54a5b1
S
205
206 def test_nosubtitles(self):
207 self.DL.expect_warning('video doesn\'t have subtitles')
208 self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
209 self.DL.params['writesubtitles'] = True
210 self.DL.params['allsubtitles'] = True
211 subtitles = self.getSubtitles()
4d171848 212 self.assertFalse(subtitles)
0b54a5b1
S
213
214
311c3938
JMF
215class TestLyndaSubtitles(BaseTestSubtitles):
216 url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
217 IE = LyndaIE
218
219 def test_allsubtitles(self):
220 self.DL.params['writesubtitles'] = True
221 self.DL.params['allsubtitles'] = True
222 subtitles = self.getSubtitles()
223 self.assertEqual(set(subtitles.keys()), set(['en']))
224 self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
225
226
b9b42f2e
JMF
227class TestNPOSubtitles(BaseTestSubtitles):
228 url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
229 IE = NPOIE
230
231 def test_allsubtitles(self):
232 self.DL.params['writesubtitles'] = True
233 self.DL.params['allsubtitles'] = True
234 subtitles = self.getSubtitles()
235 self.assertEqual(set(subtitles.keys()), set(['nl']))
236 self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
237
238
0af25f78 239class TestMTVSubtitles(BaseTestSubtitles):
518d5ba5 240 url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
0af25f78
JMF
241 IE = ComedyCentralIE
242
243 def getInfoDict(self):
244 return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
245
246 def test_allsubtitles(self):
247 self.DL.params['writesubtitles'] = True
248 self.DL.params['allsubtitles'] = True
249 subtitles = self.getSubtitles()
250 self.assertEqual(set(subtitles.keys()), set(['en']))
518d5ba5 251 self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
0af25f78
JMF
252
253
01561da1
JMF
254class TestNRKSubtitles(BaseTestSubtitles):
255 url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
256 IE = NRKTVIE
257
258 def test_allsubtitles(self):
259 self.DL.params['writesubtitles'] = True
260 self.DL.params['allsubtitles'] = True
261 subtitles = self.getSubtitles()
262 self.assertEqual(set(subtitles.keys()), set(['no']))
f0bfaa2d 263 self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
01561da1
JMF
264
265
c4d6fc6d 266class TestRaiPlaySubtitles(BaseTestSubtitles):
c4d6fc6d 267 IE = RaiPlayIE
afbdd3ac 268
00dd0cd5 269 def test_subtitles_key(self):
270 self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
afbdd3ac
JMF
271 self.DL.params['writesubtitles'] = True
272 self.DL.params['allsubtitles'] = True
273 subtitles = self.getSubtitles()
274 self.assertEqual(set(subtitles.keys()), set(['it']))
275 self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
276
00dd0cd5 277 def test_subtitles_array_key(self):
278 self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
279 self.DL.params['writesubtitles'] = True
280 self.DL.params['allsubtitles'] = True
281 subtitles = self.getSubtitles()
282 self.assertEqual(set(subtitles.keys()), set(['it']))
283 self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
284
afbdd3ac 285
4f7cea6c
JMF
286class TestVikiSubtitles(BaseTestSubtitles):
287 url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
288 IE = VikiIE
289
290 def test_allsubtitles(self):
291 self.DL.params['writesubtitles'] = True
292 self.DL.params['allsubtitles'] = True
293 subtitles = self.getSubtitles()
294 self.assertEqual(set(subtitles.keys()), set(['en']))
80970e53 295 self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
4f7cea6c
JMF
296
297
8807f127
JMF
298class TestThePlatformSubtitles(BaseTestSubtitles):
299 # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
300 # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
301 url = 'theplatform:JFUjUE1_ehvq'
302 IE = ThePlatformIE
303
304 def test_allsubtitles(self):
305 self.DL.params['writesubtitles'] = True
306 self.DL.params['allsubtitles'] = True
307 subtitles = self.getSubtitles()
308 self.assertEqual(set(subtitles.keys()), set(['en']))
309 self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
310
311
f908b74f
YCH
312class TestThePlatformFeedSubtitles(BaseTestSubtitles):
313 url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
314 IE = ThePlatformFeedIE
315
316 def test_allsubtitles(self):
317 self.DL.params['writesubtitles'] = True
318 self.DL.params['allsubtitles'] = True
319 subtitles = self.getSubtitles()
320 self.assertEqual(set(subtitles.keys()), set(['en']))
321 self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
322
323
25ac63ed
JMF
324class TestRtveSubtitles(BaseTestSubtitles):
325 url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
326 IE = RTVEALaCartaIE
327
328 def test_allsubtitles(self):
329 print('Skipping, only available from Spain')
330 return
331 self.DL.params['writesubtitles'] = True
332 self.DL.params['allsubtitles'] = True
333 subtitles = self.getSubtitles()
334 self.assertEqual(set(subtitles.keys()), set(['es']))
335 self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
336
337
66d041f2
YCH
338class TestDemocracynowSubtitles(BaseTestSubtitles):
339 url = 'http://www.democracynow.org/shows/2015/7/3'
340 IE = DemocracynowIE
341
342 def test_allsubtitles(self):
343 self.DL.params['writesubtitles'] = True
344 self.DL.params['allsubtitles'] = True
345 subtitles = self.getSubtitles()
346 self.assertEqual(set(subtitles.keys()), set(['en']))
347 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
348
349 def test_subtitles_in_page(self):
350 self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
351 self.DL.params['writesubtitles'] = True
352 self.DL.params['allsubtitles'] = True
353 subtitles = self.getSubtitles()
354 self.assertEqual(set(subtitles.keys()), set(['en']))
355 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
356
357
ae84f879
JMF
358if __name__ == '__main__':
359 unittest.main()