From: John Runyon Date: Fri, 25 Aug 2023 21:58:32 +0000 (-0600) Subject: urls - remove broken APIs X-Git-Url: https://jfr.im/git/erebus.git/commitdiff_plain/9df62f9028009c5b820f496ca4c04443d415a917 urls - remove broken APIs --- diff --git a/modules/urls.py b/modules/urls.py index 0171fe4..ae80a4e 100644 --- a/modules/urls.py +++ b/modules/urls.py @@ -211,10 +211,10 @@ def goturl(url): for regex in group: if regex.match(url): return None - request = urllib2.Request(url) + request = urllib2.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'}) opener = urllib2.build_opener(SmartRedirectHandler()) try: - soup = BeautifulSoup(opener.open(request, timeout=0.5)) + soup = BeautifulSoup(opener.open(request, timeout=2)) if soup.title: return unescape('%s' % (soup.title.string)) else: @@ -223,26 +223,15 @@ def goturl(url): return 'Error: %s %s' % (e.code, e.reason) except urllib2.URLError as e: return 'Error: %s' % (e.reason) + except TimeoutError as e: + return 'Error: request timed out' except Exception as e: - return 'Error: %r' % (e.args) + return 'Error: %s %r' % (type(e).__name__, e.args) url_regex = ( re.compile(r'https?://[^/\s]+\.[^/\s]+(?:/\S+)?'), ) -spotify_regex = ( - re.compile(r'spotify:(?P\w+):(?P\w{22})'), - re.compile(r'https?://open\.spotify\.com/(?P\w+)/(?P\w+)') -) -youtube_regex = ( - re.compile(r'https?://(?:www\.)?youtube\.com/watch\?[a-zA-Z0-9=&_\-]+'), -) -twitch_regex = ( - re.compile(r'https?:\/\/(?:www\.)?twitch.tv\/([A-Za-z0-9]*)'), -) other_regexes = ( - (gotspotify, spotify_regex, 'Spotify'), - (gotyoutube, youtube_regex, 'YouTube'), - (gottwitch, twitch_regex, 'Twitch'), ) regexes = other_regexes + ( (goturl, url_regex, 'Title'),