[yt-dlp.git] / yt_dlp / extractor / franceinter.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import month_by_name


class FranceInterIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'

    _TEST = {
        'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
        'md5': '9e54d7bdb6fdc02a841007f8a975c094',
        'info_dict': {
            'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
            'ext': 'mp3',
            'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
            'description': 'md5:401969c5d318c061f86bda1fa359292b',
            'thumbnail': r're:^https?://.*\.jpg',
            'upload_date': '20160907',
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        video_url = self._search_regex(
            r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
            webpage, 'video url', group='url')

        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage)
        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)

        upload_date_str = self._search_regex(
            r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
            webpage, 'upload date', fatal=False)
        if upload_date_str:
            upload_date_list = upload_date_str.split()
            upload_date_list.reverse()
            upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
            upload_date_list[2] = '%02d' % int(upload_date_list[2])
            upload_date = ''.join(upload_date_list)
        else:
            upload_date = None

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'formats': [{
                'url': video_url,
                'vcodec': 'none',
            }],
        }
Commit	Line	Data
677b3ce8	1	# coding: utf-8
c8650f7e	2	from __future__ import unicode_literals
c8650f7e	3
bf6705f5	4	from .common import InfoExtractor
0002962f	5	from ..utils import month_by_name
677b3ce8 PH	6
677b3ce8 PH	7
bf6705f5	8	class FranceInterIE(InfoExtractor):
a942d6cb	9	_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
a942d6cb	10
677b3ce8	11	_TEST = {
c51a7f0b S	12	'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
c51a7f0b S	13	'md5': '9e54d7bdb6fdc02a841007f8a975c094',
611c1dd9	14	'info_dict': {
c51a7f0b	15	'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
532f5bff	16	'ext': 'mp3',
c51a7f0b S	17	'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
c51a7f0b S	18	'description': 'md5:401969c5d318c061f86bda1fa359292b',
70c5802b	19	'thumbnail': r're:^https?://.*\.jpg',
c51a7f0b	20	'upload_date': '20160907',
677b3ce8 PH	21	},
677b3ce8 PH	22	}
bf6705f5	23
677b3ce8	24	def _real_extract(self, url):
2db58069	25	video_id = self._match_id(url)
26844eb5	26
677b3ce8	27	webpage = self._download_webpage(url, video_id)
532f5bff	28
a942d6cb	29	video_url = self._search_regex(
0002962f S	30	r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]>.?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
0002962f S	31	webpage, 'video url', group='url')
a942d6cb	32
	33	title = self._og_search_title(webpage)
	34	description = self._og_search_description(webpage)
70c5802b	35	thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
a942d6cb	36
0002962f	37	upload_date_str = self._search_regex(
b2a027fc	38	r'class=["\']\scover-emission-period\s["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
0002962f S	39	webpage, 'upload date', fatal=False)
	40	if upload_date_str:
	41	upload_date_list = upload_date_str.split()
	42	upload_date_list.reverse()
52dc8a9b	43	upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
c51a7f0b	44	upload_date_list[2] = '%02d' % int(upload_date_list[2])
0002962f S	45	upload_date = ''.join(upload_date_list)
	46	else:
	47	upload_date = None
532f5bff	48
677b3ce8 PH	49	return {
677b3ce8 PH	50	'id': video_id,
532f5bff S	51	'title': title,
532f5bff S	52	'description': description,
70c5802b	53	'thumbnail': thumbnail,
0002962f	54	'upload_date': upload_date,
677b3ce8 PH	55	'formats': [{
	56	'url': video_url,
	57	'vcodec': 'none',
	58	}],
677b3ce8	59	}