[yt-dlp.git] / youtube_dl / extractor / tubitv.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    int_or_none,
    sanitized_Request,
    urlencode_postdata,
)


class TubiTvIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
    _LOGIN_URL = 'http://tubitv.com/login'
    _NETRC_MACHINE = 'tubitv'
    _TEST = {
        'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
        'md5': '43ac06be9326f41912dc64ccf7a80320',
        'info_dict': {
            'id': '283829',
            'ext': 'mp4',
            'title': 'The Comedian at The Friday',
            'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
            'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
        },
    }

    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
            return
        self.report_login()
        form_data = {
            'username': username,
            'password': password,
        }
        payload = urlencode_postdata(form_data)
        request = sanitized_Request(self._LOGIN_URL, payload)
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        login_page = self._download_webpage(
            request, None, False, 'Wrong login info')
        if not re.search(r'id="tubi-logout"', login_page):
            raise ExtractorError(
                'Login failed (invalid username/password)', expected=True)

    def _real_initialize(self):
        self._login()

    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
            'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
        title = video_data['title']

        formats = self._extract_m3u8_formats(
            self._proto_relative_url(video_data['url']),
            video_id, 'mp4', 'm3u8_native')
        self._sort_formats(formats)

        thumbnails = []
        for thumbnail_url in video_data.get('thumbnails', []):
            if not thumbnail_url:
                continue
            thumbnails.append({
                'url': self._proto_relative_url(thumbnail_url),
            })

        subtitles = {}
        for sub in video_data.get('subtitles', []):
            sub_url = sub.get('url')
            if not sub_url:
                continue
            subtitles.setdefault(sub.get('lang', 'English'), []).append({
                'url': self._proto_relative_url(sub_url),
            })

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'subtitles': subtitles,
            'thumbnails': thumbnails,
            'description': video_data.get('description'),
            'duration': int_or_none(video_data.get('duration')),
            'uploader_id': video_data.get('publisher_id'),
        }
Commit	Line	Data
5196b988 NJ	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
5196b988 NJ	4	import re
	5
	6	from .common import InfoExtractor
5196b988 NJ	7	from ..utils import (
	8	ExtractorError,
	9	int_or_none,
5c2266df	10	sanitized_Request,
6e6bc8da	11	urlencode_postdata,
5196b988 NJ	12	)
	13
	14
	15	class TubiTvIE(InfoExtractor):
9260cf1d	16	_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
5196b988 NJ	17	_LOGIN_URL = 'http://tubitv.com/login'
	18	_NETRC_MACHINE = 'tubitv'
	19	_TEST = {
9260cf1d	20	'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
f4dfa9a5	21	'md5': '43ac06be9326f41912dc64ccf7a80320',
5196b988	22	'info_dict': {
9260cf1d	23	'id': '283829',
5196b988	24	'ext': 'mp4',
9260cf1d	25	'title': 'The Comedian at The Friday',
9260cf1d	26	'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
f4dfa9a5	27	'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
5196b988 NJ	28	},
	29	}
	30
	31	def _login(self):
	32	(username, password) = self._get_login_info()
	33	if username is None:
	34	return
	35	self.report_login()
	36	form_data = {
	37	'username': username,
	38	'password': password,
	39	}
6e6bc8da	40	payload = urlencode_postdata(form_data)
5c2266df	41	request = sanitized_Request(self._LOGIN_URL, payload)
5196b988 NJ	42	request.add_header('Content-Type', 'application/x-www-form-urlencoded')
	43	login_page = self._download_webpage(
	44	request, None, False, 'Wrong login info')
	45	if not re.search(r'id="tubi-logout"', login_page):
	46	raise ExtractorError(
	47	'Login failed (invalid username/password)', expected=True)
	48
	49	def _real_initialize(self):
	50	self._login()
	51
	52	def _real_extract(self, url):
	53	video_id = self._match_id(url)
9260cf1d	54	video_data = self._download_json(
9260cf1d	55	'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
f4dfa9a5	56	title = video_data['title']
5196b988	57
9260cf1d	58	formats = self._extract_m3u8_formats(
f4dfa9a5 RA	59	self._proto_relative_url(video_data['url']),
f4dfa9a5 RA	60	video_id, 'mp4', 'm3u8_native')
19dbaeec	61	self._sort_formats(formats)
5196b988	62
f4dfa9a5 RA	63	thumbnails = []
	64	for thumbnail_url in video_data.get('thumbnails', []):
	65	if not thumbnail_url:
	66	continue
	67	thumbnails.append({
	68	'url': self._proto_relative_url(thumbnail_url),
	69	})
	70
9260cf1d	71	subtitles = {}
f4dfa9a5 RA	72	for sub in video_data.get('subtitles', []):
f4dfa9a5 RA	73	sub_url = sub.get('url')
9260cf1d	74	if not sub_url:
9260cf1d	75	continue
f4dfa9a5 RA	76	subtitles.setdefault(sub.get('lang', 'English'), []).append({
f4dfa9a5 RA	77	'url': self._proto_relative_url(sub_url),
9260cf1d	78	})
9260cf1d	79
5196b988 NJ	80	return {
	81	'id': video_id,
	82	'title': title,
	83	'formats': formats,
9260cf1d	84	'subtitles': subtitles,
f4dfa9a5 RA	85	'thumbnails': thumbnails,
	86	'description': video_data.get('description'),
	87	'duration': int_or_none(video_data.get('duration')),
	88	'uploader_id': video_data.get('publisher_id'),
5196b988	89	}