]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/ina.py
[ina] improve extraction
[yt-dlp.git] / youtube_dl / extractor / ina.py
CommitLineData
dcdb292f 1# coding: utf-8
de563c9d
JMF
2from __future__ import unicode_literals
3
9fe4de34 4from .common import InfoExtractor
b27a71e6
RA
5from ..utils import (
6 int_or_none,
7 strip_or_none,
8 xpath_attr,
9 xpath_text,
10)
9fe4de34
PH
11
12
13class InaIE(InfoExtractor):
b27a71e6
RA
14 _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>[A-Z0-9_]+)'
15 _TESTS = [{
de563c9d
JMF
16 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
17 'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
18 'info_dict': {
19 'id': 'I12055569',
20 'ext': 'mp4',
21 'title': 'François Hollande "Je crois que c\'est clair"',
b27a71e6 22 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663',
6f5ac90c 23 }
b27a71e6
RA
24 }, {
25 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html',
26 'only_matching': True,
27 }]
9fe4de34 28
de563c9d 29 def _real_extract(self, url):
b27a71e6
RA
30 video_id = self._match_id(url)
31 info_doc = self._download_xml(
32 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id)
33 item = info_doc.find('channel/item')
34 title = xpath_text(item, 'title', fatal=True)
35 media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/')
36 content = item.find(media_ns_xpath('content'))
9fe4de34 37
b27a71e6
RA
38 get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url')
39 formats = []
40 for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)):
41 q_url = get_furl(q)
42 if not q_url:
43 continue
44 formats.append({
45 'format_id': q,
46 'url': q_url,
47 'width': w,
48 'height': h,
49 })
50 if not formats:
51 formats = [{
52 'url': get_furl('player') or content.attrib['url'],
53 }]
9fe4de34 54
b27a71e6
RA
55 thumbnails = []
56 for thumbnail in content.findall(media_ns_xpath('thumbnail')):
57 thumbnail_url = thumbnail.get('url')
58 if not thumbnail_url:
59 continue
60 thumbnails.append({
61 'url': thumbnail_url,
62 'height': int_or_none(thumbnail.get('height')),
63 'width': int_or_none(thumbnail.get('width')),
64 })
9fe4de34 65
de563c9d
JMF
66 return {
67 'id': video_id,
b27a71e6
RA
68 'formats': formats,
69 'title': title,
70 'description': strip_or_none(xpath_text(item, 'description')),
71 'thumbnails': thumbnails,
de563c9d 72 }