]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/digiteka.py
Fix "invalid escape sequences" error on Python 3.6
[yt-dlp.git] / youtube_dl / extractor / digiteka.py
CommitLineData
3073a6d5
S
1# coding: utf-8
2from __future__ import unicode_literals
3
b30ef07c
S
4import re
5
3073a6d5 6from .common import InfoExtractor
6aeba407 7from ..utils import int_or_none
3073a6d5
S
8
9
aecfcd4e 10class DigitekaIE(InfoExtractor):
3da39996 11 _VALID_URL = r'''(?x)
942d4619 12 https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
3da39996 13 (?:
14 deliver/
15 (?P<embed_type>
16 generic|
17 musique
18 )
19 (?:/[^/]+)*/
20 (?:
21 src|
22 article
23 )|
24 default/index/video
25 (?P<site_type>
26 generic|
27 music
28 )
29 /id
30 )/(?P<id>[\d+a-z]+)'''
3073a6d5
S
31 _TESTS = [{
32 # news
3da39996 33 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
3073a6d5
S
34 'md5': '276a0e49de58c7e85d32b057837952a2',
35 'info_dict': {
36 'id': 's8uk0r',
37 'ext': 'mp4',
38 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
ec85ded8 39 'thumbnail': r're:^https?://.*\.jpg',
6aeba407 40 'duration': 74,
3073a6d5 41 'upload_date': '20150317',
6aeba407 42 'timestamp': 1426604939,
43 'uploader_id': '3fszv',
3073a6d5
S
44 },
45 }, {
46 # music
3da39996 47 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
3073a6d5
S
48 'md5': '2ea3513813cf230605c7e2ffe7eca61c',
49 'info_dict': {
50 'id': 'xvpfp8',
51 'ext': 'mp4',
6aeba407 52 'title': 'Two - C\'est La Vie (clip)',
ec85ded8 53 'thumbnail': r're:^https?://.*\.jpg',
6aeba407 54 'duration': 233,
3073a6d5 55 'upload_date': '20150224',
6aeba407 56 'timestamp': 1424760500,
57 'uploader_id': '3rfzk',
3073a6d5 58 },
942d4619
S
59 }, {
60 'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
61 'only_matching': True,
3073a6d5
S
62 }]
63
6aeba407 64 @staticmethod
65 def _extract_url(webpage):
66 mobj = re.search(
67 r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
68 webpage)
69 if mobj:
70 return mobj.group('url')
3073a6d5 71
6aeba407 72 def _real_extract(self, url):
3da39996 73 mobj = re.match(self._VALID_URL, url)
74 video_id = mobj.group('id')
75 video_type = mobj.group('embed_type') or mobj.group('site_type')
76 if video_type == 'music':
77 video_type = 'musique'
3073a6d5 78
6aeba407 79 deliver_info = self._download_json(
80 'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
81 video_id)
3073a6d5 82
6aeba407 83 yt_id = deliver_info.get('yt_id')
84 if yt_id:
85 return self.url_result(yt_id, 'Youtube')
73900846 86
6aeba407 87 jwconf = deliver_info['jwconf']
3073a6d5 88
b30ef07c 89 formats = []
6aeba407 90 for source in jwconf['playlist'][0]['sources']:
b30ef07c 91 formats.append({
6aeba407 92 'url': source['file'],
93 'format_id': source.get('label'),
b30ef07c 94 })
3073a6d5 95
6aeba407 96 self._sort_formats(formats)
3073a6d5 97
6aeba407 98 title = deliver_info['title']
99 thumbnail = jwconf.get('image')
100 duration = int_or_none(deliver_info.get('duration'))
101 timestamp = int_or_none(deliver_info.get('release_time'))
102 uploader_id = deliver_info.get('owner_id')
3073a6d5
S
103
104 return {
105 'id': video_id,
106 'title': title,
3073a6d5 107 'thumbnail': thumbnail,
6aeba407 108 'duration': duration,
109 'timestamp': timestamp,
110 'uploader_id': uploader_id,
3073a6d5
S
111 'formats': formats,
112 }