[yt-dlp.git] / yt_dlp / extractor / theintercept.py

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    ExtractorError,
    int_or_none,
    parse_iso8601,
)


class TheInterceptIE(InfoExtractor):
    _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/',
        'md5': '145f28b41d44aab2f87c0a4ac8ec95bd',
        'info_dict': {
            'id': '46214',
            'ext': 'mp4',
            'title': '#ThisIsACoup – Episode Four: Surrender or Die',
            'description': 'md5:74dd27f0e2fbd50817829f97eaa33140',
            'timestamp': 1450429239,
            'upload_date': '20151218',
            'comment_count': int,
        }
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)

        json_data = self._parse_json(self._search_regex(
            r'initialStoreTree\s*=\s*(?P<json_data>{.+})', webpage,
            'initialStoreTree'), display_id)

        for post in json_data['resources']['posts'].values():
            if post['slug'] == display_id:
                return {
                    '_type': 'url_transparent',
                    'url': 'jwplatform:%s' % post['fov_videoid'],
                    'id': compat_str(post['ID']),
                    'display_id': display_id,
                    'title': post['title'],
                    'description': post.get('excerpt'),
                    'timestamp': parse_iso8601(post.get('date')),
                    'comment_count': int_or_none(post.get('comments_number')),
                }
        raise ExtractorError('Unable to find the current post')
Commit	Line	Data
2be689b7	1	from .common import InfoExtractor
96db61ff	2	from ..compat import compat_str
2be689b7	3	from ..utils import (
2be689b7	4	ExtractorError,
e897bd82 SS	5	int_or_none,
e897bd82 SS	6	parse_iso8601,
2be689b7	7	)
2be689b7	8
96db61ff	9
2be689b7	10	class TheInterceptIE(InfoExtractor):
92519402	11	_VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)'
2be689b7	12	_TESTS = [{
2be689b7	13	'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/',
96db61ff	14	'md5': '145f28b41d44aab2f87c0a4ac8ec95bd',
2be689b7	15	'info_dict': {
96db61ff	16	'id': '46214',
2be689b7	17	'ext': 'mp4',
2be689b7	18	'title': '#ThisIsACoup – Episode Four: Surrender or Die',
2be689b7	19	'description': 'md5:74dd27f0e2fbd50817829f97eaa33140',
96db61ff	20	'timestamp': 1450429239,
	21	'upload_date': '20151218',
	22	'comment_count': int,
2be689b7	23	}
	24	}]
	25
	26	def _real_extract(self, url):
	27	display_id = self._match_id(url)
	28	webpage = self._download_webpage(url, display_id)
	29
96db61ff	30	json_data = self._parse_json(self._search_regex(
	31	r'initialStoreTree\s=\s(?P<json_data>{.+})', webpage,
	32	'initialStoreTree'), display_id)
2be689b7	33
2be689b7	34	for post in json_data['resources']['posts'].values():
2be689b7	35	if post['slug'] == display_id:
96db61ff	36	return {
	37	'_type': 'url_transparent',
	38	'url': 'jwplatform:%s' % post['fov_videoid'],
	39	'id': compat_str(post['ID']),
	40	'display_id': display_id,
	41	'title': post['title'],
	42	'description': post.get('excerpt'),
	43	'timestamp': parse_iso8601(post.get('date')),
	44	'comment_count': int_or_none(post.get('comments_number')),
	45	}
	46	raise ExtractorError('Unable to find the current post')