import re
from .common import InfoExtractor
-
from ..utils import traverse_obj
class WashingtonPostIE(InfoExtractor):
IE_NAME = 'washingtonpost'
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/(?:video|posttv)/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
- _EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})']
_TESTS = [{
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
'only_matching': True,
}]
- @classmethod
- def _extract_urls(cls, webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'upload_date': '20141230',
'timestamp': 1419972442,
'title': 'Why black boxes don’t transmit data in real time',
- }
+ },
}],
'skip': 'Doesnt have a video anymore',
}, {
@classmethod
def suitable(cls, url):
- return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
+ return False if WashingtonPostIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
page_id = self._match_id(url)
if content_element.get('type') == 'video':
uuids.append(content_element.get('_id'))
- entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
+ entries = [self.url_result(f'washingtonpost:{uuid}', 'WashingtonPost', uuid) for uuid in uuids]
return {
'_type': 'playlist',