[yt-dlp.git] / yt_dlp / extractor / traileraddict.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor


class TrailerAddictIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
    _TEST = {
        'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
        'md5': '41365557f3c8c397d091da510e73ceb4',
        'info_dict': {
            'id': '76184',
            'ext': 'mp4',
            'title': 'Prince Avalanche Trailer',
            'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
        }
    }

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        name = mobj.group('movie') + '/' + mobj.group('trailer_name')
        webpage = self._download_webpage(url, name)

        title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '')
        view_count_str = self._search_regex(
            r'<span class="views_n">([0-9,.]+)</span>',
            webpage, 'view count', fatal=False)
        view_count = (
            None if view_count_str is None
            else int(view_count_str.replace(',', '')))
        video_id = self._search_regex(
            r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
            webpage, 'video id')

        # Presence of (no)watchplus function indicates HD quality is available
        if re.search(r'function (no)?watchplus()', webpage):
            fvar = 'fvarhd'
        else:
            fvar = 'fvar'

        info_url = 'http://www.traileraddict.com/%s.php?tid=%s' % (fvar, str(video_id))
        info_webpage = self._download_webpage(info_url, video_id, 'Downloading the info webpage')

        final_url = self._search_regex(r'&fileurl=(.+)',
                                       info_webpage, 'Download url').replace('%3F', '?')
        thumbnail_url = self._search_regex(r'&image=(.+?)&',
                                           info_webpage, 'thumbnail url')

        description = self._html_search_regex(
            r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
            webpage, 'description', fatal=False)

        return {
            'id': video_id,
            'url': final_url,
            'title': title,
            'thumbnail': thumbnail_url,
            'description': description,
            'view_count': view_count,
        }
Commit	Line	Data
56b6faf9 PH	1	from __future__ import unicode_literals
56b6faf9 PH	2
887a2279 YK	3	import re
	4
	5	from .common import InfoExtractor
	6
	7
	8	class TrailerAddictIE(InfoExtractor):
58d915df	9	_WORKING = False
5886b38d	10	_VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer\|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
887a2279	11	_TEST = {
56b6faf9 PH	12	'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
	13	'md5': '41365557f3c8c397d091da510e73ceb4',
	14	'info_dict': {
	15	'id': '76184',
	16	'ext': 'mp4',
	17	'title': 'Prince Avalanche Trailer',
	18	'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
887a2279 YK	19	}
	20	}
	21
	22	def _real_extract(self, url):
5ad28e7f	23	mobj = self._match_valid_url(url)
16484d49 JMF	24	name = mobj.group('movie') + '/' + mobj.group('trailer_name')
16484d49 JMF	25	webpage = self._download_webpage(url, name)
b1ca5e3f	26
04f3fd2c	27	title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '')
56b6faf9 PH	28	view_count_str = self._search_regex(
	29	r'<span class="views_n">([0-9,.]+)</span>',
	30	webpage, 'view count', fatal=False)
	31	view_count = (
	32	None if view_count_str is None
	33	else int(view_count_str.replace(',', '')))
	34	video_id = self._search_regex(
	35	r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
	36	webpage, 'video id')
46720279	37
b1ca5e3f AVH	38	# Presence of (no)watchplus function indicates HD quality is available
b1ca5e3f AVH	39	if re.search(r'function (no)?watchplus()', webpage):
611c1dd9	40	fvar = 'fvarhd'
b1ca5e3f	41	else:
611c1dd9	42	fvar = 'fvar'
b1ca5e3f	43
611c1dd9 S	44	info_url = 'http://www.traileraddict.com/%s.php?tid=%s' % (fvar, str(video_id))
611c1dd9 S	45	info_webpage = self._download_webpage(info_url, video_id, 'Downloading the info webpage')
b1ca5e3f	46
887a2279	47	final_url = self._search_regex(r'&fileurl=(.+)',
9e1a5b84	48	info_webpage, 'Download url').replace('%3F', '?')
887a2279	49	thumbnail_url = self._search_regex(r'&image=(.+?)&',
9e1a5b84	50	info_webpage, 'thumbnail url')
56b6faf9 PH	51
	52	description = self._html_search_regex(
	53	r'(?s)<div class="synopsis">.?<div class="movie_label_info"[^>]>(.*?)</div>',
	54	webpage, 'description', fatal=False)
	55
	56	return {
	57	'id': video_id,
	58	'url': final_url,
	59	'title': title,
	60	'thumbnail': thumbnail_url,
	61	'description': description,
	62	'view_count': view_count,
	63	}