[yt-dlp.git] / youtube_dl / extractor / motherless.py

from __future__ import unicode_literals

import datetime
import re

from .common import InfoExtractor
from ..utils import (
    str_to_int,
    unified_strdate,
)


class MotherlessIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
    _TESTS = [
        {
            'url': 'http://motherless.com/AC3FFE1',
            'md5': '310f62e325a9fafe64f68c0bccb6e75f',
            'info_dict': {
                'id': 'AC3FFE1',
                'ext': 'mp4',
                'title': 'Fucked in the ass while playing PS3',
                'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
                'upload_date': '20100913',
                'uploader_id': 'famouslyfuckedup',
                'thumbnail': 're:http://.*\.jpg',
                'age_limit': 18,
            }
        },
        {
            'url': 'http://motherless.com/532291B',
            'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
            'info_dict': {
                'id': '532291B',
                'ext': 'mp4',
                'title': 'Amazing girl playing the omegle game, PERFECT!',
                'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
                'upload_date': '20140622',
                'uploader_id': 'Sulivana7x',
                'thumbnail': 're:http://.*\.jpg',
                'age_limit': 18,
            }
        },
        {
            'url': 'http://motherless.com/g/cosplay/633979F',
            'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
            'info_dict': {
                'id': '633979F',
                'ext': 'mp4',
                'title': 'Turtlette',
                'categories': ['superheroine heroine  superher'],
                'upload_date': '20140827',
                'uploader_id': 'shade0230',
                'thumbnail': 're:http://.*\.jpg',
                'age_limit': 18,
            }
        }
    ]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        title = self._html_search_regex(
            r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
        video_url = self._html_search_regex(
            r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
        age_limit = self._rta_search(webpage)
        view_count = str_to_int(self._html_search_regex(
            r'<strong>Views</strong>\s+([^<]+)<',
            webpage, 'view count', fatal=False))
        like_count = str_to_int(self._html_search_regex(
            r'<strong>Favorited</strong>\s+([^<]+)<',
            webpage, 'like count', fatal=False))

        upload_date = self._html_search_regex(
            r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
        if 'Ago' in upload_date:
            days = int(re.search(r'([0-9]+)', upload_date).group(1))
            upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
        else:
            upload_date = unified_strdate(upload_date)

        comment_count = webpage.count('class="media-comment-contents"')
        uploader_id = self._html_search_regex(
            r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
            webpage, 'uploader_id')

        categories = self._html_search_meta('keywords', webpage)
        if categories:
            categories = [cat.strip() for cat in categories.split(',')]

        return {
            'id': video_id,
            'title': title,
            'upload_date': upload_date,
            'uploader_id': uploader_id,
            'thumbnail': self._og_search_thumbnail(webpage),
            'categories': categories,
            'view_count': view_count,
            'like_count': like_count,
            'comment_count': comment_count,
            'age_limit': age_limit,
            'url': video_url,
        }
Commit	Line	Data
a69969ee TJ	1	from __future__ import unicode_literals
	2
	3	import datetime
	4	import re
	5
	6	from .common import InfoExtractor
78ff59d0	7	from ..utils import (
8efd06aa	8	str_to_int,
78ff59d0 PP	9	unified_strdate,
78ff59d0 PP	10	)
a69969ee TJ	11
	12
	13	class MotherlessIE(InfoExtractor):
7f9ced64	14	_VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
a69969ee TJ	15	_TESTS = [
	16	{
	17	'url': 'http://motherless.com/AC3FFE1',
5823eda1	18	'md5': '310f62e325a9fafe64f68c0bccb6e75f',
a69969ee TJ	19	'info_dict': {
a69969ee TJ	20	'id': 'AC3FFE1',
5823eda1	21	'ext': 'mp4',
a69969ee TJ	22	'title': 'Fucked in the ass while playing PS3',
	23	'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
	24	'upload_date': '20100913',
	25	'uploader_id': 'famouslyfuckedup',
78ff59d0	26	'thumbnail': 're:http://.*\.jpg',
a69969ee TJ	27	'age_limit': 18,
	28	}
	29	},
	30	{
	31	'url': 'http://motherless.com/532291B',
	32	'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
	33	'info_dict': {
	34	'id': '532291B',
	35	'ext': 'mp4',
	36	'title': 'Amazing girl playing the omegle game, PERFECT!',
	37	'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
	38	'upload_date': '20140622',
	39	'uploader_id': 'Sulivana7x',
78ff59d0	40	'thumbnail': 're:http://.*\.jpg',
a69969ee TJ	41	'age_limit': 18,
a69969ee TJ	42	}
7608815c F	43	},
	44	{
	45	'url': 'http://motherless.com/g/cosplay/633979F',
	46	'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
	47	'info_dict': {
	48	'id': '633979F',
	49	'ext': 'mp4',
	50	'title': 'Turtlette',
	51	'categories': ['superheroine heroine superher'],
	52	'upload_date': '20140827',
	53	'uploader_id': 'shade0230',
	54	'thumbnail': 're:http://.*\.jpg',
	55	'age_limit': 18,
	56	}
a69969ee TJ	57	}
	58	]
	59
8efd06aa PH	60	def _real_extract(self, url):
8efd06aa PH	61	video_id = self._match_id(url)
a69969ee TJ	62	webpage = self._download_webpage(url, video_id)
a69969ee TJ	63
8efd06aa PH	64	title = self._html_search_regex(
	65	r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
	66	video_url = self._html_search_regex(
	67	r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
78ff59d0	68	age_limit = self._rta_search(webpage)
8efd06aa PH	69	view_count = str_to_int(self._html_search_regex(
	70	r'<strong>Views</strong>\s+([^<]+)<',
	71	webpage, 'view count', fatal=False))
	72	like_count = str_to_int(self._html_search_regex(
	73	r'<strong>Favorited</strong>\s+([^<]+)<',
	74	webpage, 'like count', fatal=False))
5f6a1245	75
8efd06aa PH	76	upload_date = self._html_search_regex(
8efd06aa PH	77	r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
78ff59d0 PP	78	if 'Ago' in upload_date:
	79	days = int(re.search(r'([0-9]+)', upload_date).group(1))
	80	upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
	81	else:
	82	upload_date = unified_strdate(upload_date)
	83
a69969ee	84	comment_count = webpage.count('class="media-comment-contents"')
8efd06aa PH	85	uploader_id = self._html_search_regex(
	86	r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
	87	webpage, 'uploader_id')
a69969ee TJ	88
a69969ee TJ	89	categories = self._html_search_meta('keywords', webpage)
78ff59d0	90	if categories:
a69969ee TJ	91	categories = [cat.strip() for cat in categories.split(',')]
a69969ee TJ	92
a69969ee TJ	93	return {
	94	'id': video_id,
	95	'title': title,
	96	'upload_date': upload_date,
	97	'uploader_id': uploader_id,
78ff59d0	98	'thumbnail': self._og_search_thumbnail(webpage),
a69969ee	99	'categories': categories,
8efd06aa PH	100	'view_count': view_count,
8efd06aa PH	101	'like_count': like_count,
a69969ee TJ	102	'comment_count': comment_count,
	103	'age_limit': age_limit,
	104	'url': video_url,
	105	}