[yt-dlp.git] / youtube_dl / extractor / depositfiles.py

import re
import os
import socket

from .common import InfoExtractor
from ..utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,

    ExtractorError,
)


class DepositFilesIE(InfoExtractor):
    """Information extractor for depositfiles.com"""

    _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'

    def _real_extract(self, url):
        file_id = url.split('/')[-1]
        # Rebuild url in english locale
        url = 'http://depositfiles.com/en/files/' + file_id

        # Retrieve file webpage with 'Free download' button pressed
        free_download_indication = {'gateway_result' : '1'}
        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
        try:
            self.report_download_webpage(file_id)
            webpage = compat_urllib_request.urlopen(request).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))

        # Search for the real file URL
        mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
        if (mobj is None) or (mobj.group(1) is None):
            # Try to figure out reason of the error.
            mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
            if (mobj is not None) and (mobj.group(1) is not None):
                restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
                raise ExtractorError(u'%s' % restriction_message)
            else:
                raise ExtractorError(u'Unable to extract download URL from: %s' % url)

        file_url = mobj.group(1)
        file_extension = os.path.splitext(file_url)[1][1:]

        # Search for file title
        file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')

        return [{
            'id':       file_id.decode('utf-8'),
            'url':      file_url.decode('utf-8'),
            'uploader': None,
            'upload_date':  None,
            'title':    file_title,
            'ext':      file_extension.decode('utf-8'),
        }]
Commit	Line	Data
426ff042 PH	1	import re
	2	import os
	3	import socket
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
	7	compat_http_client,
	8	compat_str,
	9	compat_urllib_error,
	10	compat_urllib_parse,
	11	compat_urllib_request,
	12
	13	ExtractorError,
	14	)
	15
	16
	17	class DepositFilesIE(InfoExtractor):
	18	"""Information extractor for depositfiles.com"""
	19
	20	_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
	21
	22	def _real_extract(self, url):
	23	file_id = url.split('/')[-1]
	24	# Rebuild url in english locale
	25	url = 'http://depositfiles.com/en/files/' + file_id
	26
	27	# Retrieve file webpage with 'Free download' button pressed
a56f9de1	28	free_download_indication = {'gateway_result' : '1'}
426ff042 PH	29	request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
	30	try:
	31	self.report_download_webpage(file_id)
	32	webpage = compat_urllib_request.urlopen(request).read()
	33	except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
	34	raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
	35
	36	# Search for the real file URL
	37	mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
	38	if (mobj is None) or (mobj.group(1) is None):
	39	# Try to figure out reason of the error.
	40	mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
	41	if (mobj is not None) and (mobj.group(1) is not None):
	42	restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
	43	raise ExtractorError(u'%s' % restriction_message)
	44	else:
	45	raise ExtractorError(u'Unable to extract download URL from: %s' % url)
	46
	47	file_url = mobj.group(1)
	48	file_extension = os.path.splitext(file_url)[1][1:]
	49
	50	# Search for file title
	51	file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
	52
	53	return [{
	54	'id': file_id.decode('utf-8'),
	55	'url': file_url.decode('utf-8'),
	56	'uploader': None,
	57	'upload_date': None,
	58	'title': file_title,
	59	'ext': file_extension.decode('utf-8'),
	60	}]