[yt-dlp.git] / youtube_dl / extractor / normalboots.py

import re

from .common import InfoExtractor

from ..utils import (
    ExtractorError,
    unified_strdate,
)

class NormalbootsIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
    
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('videoid')
        
        info = {
            'id': video_id,
            'uploader': None,
            'upload_date': None,
        }
        
        if url[:4] != 'http':
            url = 'http://' + url
        
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage)
        video_description = self._og_search_description(webpage)
        video_thumbnail = self._og_search_thumbnail(webpage)
        video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
            webpage, 'uploader')
        raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', 
            webpage, 'date')
        video_upload_date = unified_strdate(raw_upload_date)
        video_upload_date = unified_strdate(raw_upload_date)
            
        player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
        player_page = self._download_webpage(player_url, video_id)
        video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
        
        info['url'] = video_url
        info['title'] = video_title
        info['description'] = video_description
        info['thumbnail'] = video_thumbnail
        info['uploader'] = video_uploader
        info['upload_date'] = video_upload_date
        
        return info
Commit	Line	Data
c81a855b OP	1	import re
	2
	3	from .common import InfoExtractor
	4
	5	from ..utils import (
	6	ExtractorError,
	7	unified_strdate,
	8	)
	9
	10	class NormalbootsIE(InfoExtractor):
	11	_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
	12
	13	def _real_extract(self, url):
	14	mobj = re.match(self._VALID_URL, url)
	15	if mobj is None:
	16	raise ExtractorError(u'Invalid URL: %s' % url)
	17	video_id = mobj.group('videoid')
	18
	19	info = {
	20	'id': video_id,
	21	'uploader': None,
	22	'upload_date': None,
	23	}
	24
	25	if url[:4] != 'http':
	26	url = 'http://' + url
	27
	28	webpage = self._download_webpage(url, video_id)
	29	video_title = self._og_search_title(webpage)
	30	video_description = self._og_search_description(webpage)
	31	video_thumbnail = self._og_search_thumbnail(webpage)
	32	video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]">(?P<uploader>[A-Za-z])\s</a>',
	33	webpage, 'uploader')
	34	raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
	35	webpage, 'date')
	36	video_upload_date = unified_strdate(raw_upload_date)
	37	video_upload_date = unified_strdate(raw_upload_date)
	38
	39	player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
	40	player_page = self._download_webpage(player_url, video_id)
	41	video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
	42
	43	info['url'] = video_url
	44	info['title'] = video_title
	45	info['description'] = video_description
	46	info['thumbnail'] = video_thumbnail
	47	info['uploader'] = video_uploader
	48	info['upload_date'] = video_upload_date
	49
	50	return info