From 69d58eede2e9bf83aa1ed1d8fcf956efde494726 Mon Sep 17 00:00:00 2001 From: jesopo Date: Wed, 3 Oct 2018 13:22:37 +0100 Subject: Move src/Utils.py in to src/utils/, splitting functionality out in to modules of related functionality --- src/utils/__init__.py | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/utils/http.py | 94 +++++++++++++++++++++++++++ src/utils/irc.py | 116 +++++++++++++++++++++++++++++++++ 3 files changed, 383 insertions(+) create mode 100644 src/utils/__init__.py create mode 100644 src/utils/http.py create mode 100644 src/utils/irc.py (limited to 'src/utils') diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 00000000..d568e517 --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1,173 @@ +from . import irc, http + +import io, re +from src import ModuleManager + +TIME_SECOND = 1 +TIME_MINUTE = TIME_SECOND*60 +TIME_HOUR = TIME_MINUTE*60 +TIME_DAY = TIME_HOUR*24 +TIME_WEEK = TIME_DAY*7 + +def time_unit(seconds): + since = None + unit = None + if seconds >= TIME_WEEK: + since = seconds/TIME_WEEK + unit = "week" + elif seconds >= TIME_DAY: + since = seconds/TIME_DAY + unit = "day" + elif seconds >= TIME_HOUR: + since = seconds/TIME_HOUR + unit = "hour" + elif seconds >= TIME_MINUTE: + since = seconds/TIME_MINUTE + unit = "minute" + else: + since = seconds + unit = "second" + since = int(since) + if since > 1: + unit = "%ss" % unit # pluralise the unit + return [since, unit] + +REGEX_PRETTYTIME = re.compile("\d+[wdhms]", re.I) + +SECONDS_MINUTES = 60 +SECONDS_HOURS = SECONDS_MINUTES*60 +SECONDS_DAYS = SECONDS_HOURS*24 +SECONDS_WEEKS = SECONDS_DAYS*7 + +def from_pretty_time(pretty_time): + seconds = 0 + for match in re.findall(REGEX_PRETTYTIME, pretty_time): + number, unit = int(match[:-1]), match[-1].lower() + if unit == "m": + number = number*SECONDS_MINUTES + elif unit == "h": + number = number*SECONDS_HOURS + elif unit == "d": + number = number*SECONDS_DAYS + elif unit == "w": + number = number*SECONDS_WEEKS + seconds += number + if seconds > 0: + return seconds + +UNIT_SECOND = 5 +UNIT_MINUTE = 4 +UNIT_HOUR = 3 +UNIT_DAY = 2 +UNIT_WEEK = 1 +def to_pretty_time(total_seconds, minimum_unit=UNIT_SECOND, max_units=6): + minutes, seconds = divmod(total_seconds, 60) + hours, minutes = divmod(minutes, 60) + days, hours = divmod(hours, 24) + weeks, days = divmod(days, 7) + out = "" + + units = 0 + if weeks and minimum_unit >= UNIT_WEEK and units < max_units: + out += "%dw" % weeks + units += 1 + if days and minimum_unit >= UNIT_DAY and units < max_units: + out += "%dd" % days + units += 1 + if hours and minimum_unit >= UNIT_HOUR and units < max_units: + out += "%dh" % hours + units += 1 + if minutes and minimum_unit >= UNIT_MINUTE and units < max_units: + out += "%dm" % minutes + units += 1 + if seconds and minimum_unit >= UNIT_SECOND and units < max_units: + out += "%ds" % seconds + units += 1 + return out + +IS_TRUE = ["true", "yes", "on", "y"] +IS_FALSE = ["false", "no", "off", "n"] +def bool_or_none(s): + s = s.lower() + if s in IS_TRUE: + return True + elif s in IS_FALSE: + return False +def int_or_none(s): + stripped_s = s.lstrip("0") + if stripped_s.isdigit(): + return int(stripped_s) + +def get_closest_setting(event, setting, default=None): + server = event["server"] + if "channel" in event: + closest = event["channel"] + elif "target" in event and "is_channel" in event and event["is_channel"]: + closest = event["target"] + else: + closest = event["user"] + return closest.get_setting(setting, server.get_setting(setting, default)) + +def prevent_highlight(nickname): + return nickname[0]+"\u200c"+nickname[1:] + +def _set_get_append(obj, setting, item): + if not hasattr(obj, setting): + setattr(obj, setting, []) + getattr(obj, setting).append(item) +def hook(event, **kwargs): + def _hook_func(func): + _set_get_append(func, ModuleManager.BITBOT_HOOKS_MAGIC, + {"event": event, "kwargs": kwargs}) + return func + return _hook_func +def export(setting, value): + def _export_func(module): + _set_get_append(module, ModuleManager.BITBOT_EXPORTS_MAGIC, + {"setting": setting, "value": value}) + return module + return _export_func + +def get_hashflags(filename): + hashflags = {} + with io.open(filename, mode="r", encoding="utf8") as f: + for line in f: + line = line.strip("\n") + if not line.startswith("#"): + break + elif line.startswith("#--"): + line_split = line.split(" ", 1) + hashflag = line_split[0][3:] + value = None + + if len(line_split) > 1: + value = line_split[1] + hashflags[hashflag] = value + return hashflags.items() + +class Docstring(object): + def __init__(self, description, items): + self.description = description + self.items = items + +def parse_docstring(s): + description = "" + last_item = None + items = {} + if s: + for line in s.split("\n"): + line = line.strip() + + if line: + if line[0] == ":": + key, _, value = line.partition(": ") + last_item = value + items[value] = value + else: + if last_item: + items[last_item] += " %s" % line + else: + if description: + description += " " + description += line + return Docstring(description, items) diff --git a/src/utils/http.py b/src/utils/http.py new file mode 100644 index 00000000..e9cb41f9 --- /dev/null +++ b/src/utils/http.py @@ -0,0 +1,94 @@ +import re, traceback, urllib.error, urllib.parse, urllib.request +import json, ssl +import bs4 + +USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36") +REGEX_HTTP = re.compile("https?://", re.I) + +def get_url(url, **kwargs): + if not urllib.parse.urlparse(url).scheme: + url = "http://%s" % url + url_parsed = urllib.parse.urlparse(url) + + method = kwargs.get("method", "GET") + get_params = kwargs.get("get_params", "") + post_params = kwargs.get("post_params", None) + headers = kwargs.get("headers", {}) + if get_params: + get_params = "?%s" % urllib.parse.urlencode(get_params) + if post_params: + post_params = urllib.parse.urlencode(post_params).encode("utf8") + url = "%s%s" % (url, get_params) + try: + url.encode("latin-1") + except UnicodeEncodeError: + if kwargs.get("code"): + return 0, False + return False + + request = urllib.request.Request(url, post_params) + request.add_header("Accept-Language", "en-US") + request.add_header("User-Agent", USER_AGENT) + for header, value in headers.items(): + request.add_header(header, value) + request.method = method + + try: + response = urllib.request.urlopen(request, timeout=5) + except urllib.error.HTTPError as e: + traceback.print_exc() + if kwargs.get("code"): + return e.code, False + return False + except urllib.error.URLError as e: + traceback.print_exc() + if kwargs.get("code"): + return -1, False + return False + except ssl.CertificateError as e: + traceback.print_exc() + if kwargs.get("code"): + return -1, False, + return False + + response_content = response.read() + encoding = response.info().get_content_charset() + if kwargs.get("soup"): + return bs4.BeautifulSoup(response_content, kwargs.get("parser", "lxml")) + if not encoding: + soup = bs4.BeautifulSoup(response_content, kwargs.get("parser", "lxml")) + metas = soup.find_all("meta") + for meta in metas: + if "charset=" in meta.get("content", ""): + encoding = meta.get("content").split("charset=", 1)[1 + ].split(";", 1)[0] + elif meta.get("charset", ""): + encoding = meta.get("charset") + else: + continue + break + if not encoding: + for item in soup.contents: + if isinstance(item, bs4.Doctype): + if item == "html": + encoding = "utf8" + else: + encoding = "latin-1" + break + response_content = response_content.decode(encoding or "utf8") + data = response_content + if kwargs.get("json") and data: + try: + data = json.loads(response_content) + except json.decoder.JSONDecodeError: + traceback.print_exc() + return False + if kwargs.get("code"): + return response.code, data + else: + return data + +def strip_html(s): + return bs4.BeautifulSoup(s, "lxml").get_text() + diff --git a/src/utils/irc.py b/src/utils/irc.py new file mode 100644 index 00000000..792de7f3 --- /dev/null +++ b/src/utils/irc.py @@ -0,0 +1,116 @@ +import string, re + +ASCII_UPPER = string.ascii_uppercase +ASCII_LOWER = string.ascii_lowercase +STRICT_RFC1459_UPPER = ASCII_UPPER+r'\[]' +STRICT_RFC1459_LOWER = ASCII_LOWER+r'|{}' +RFC1459_UPPER = STRICT_RFC1459_UPPER+"^" +RFC1459_LOWER = STRICT_RFC1459_LOWER+"~" + +def remove_colon(s): + if s.startswith(":"): + s = s[1:] + return s + +# case mapping lowercase/uppcase logic +def _multi_replace(s, chars1, chars2): + for char1, char2 in zip(chars1, chars2): + s = s.replace(char1, char2) + return s +def lower(server, s): + if server.case_mapping == "ascii": + return _multi_replace(s, ASCII_UPPER, ASCII_LOWER) + elif server.case_mapping == "rfc1459": + return _multi_replace(s, RFC1459_UPPER, RFC1459_LOWER) + elif server.case_mapping == "strict-rfc1459": + return _multi_replace(s, STRICT_RFC1459_UPPER, STRICT_RFC1459_LOWER) + else: + raise ValueError("unknown casemapping '%s'" % server.case_mapping) + +# compare a string while respecting case mapping +def equals(server, s1, s2): + return lower(server, s1) == lower(server, s2) + +class IRCHostmask(object): + def __init__(self, nickname, username, hostname, hostmask): + self.nickname = nickname + self.username = username + self.hostname = hostname + self.hostmask = hostmask + def __repr__(self): + return "IRCHostmask(%s)" % self.__str__() + def __str__(self): + return self.hostmask + +def seperate_hostmask(hostmask): + hostmask = remove_colon(hostmask) + nickname, _, username = hostmask.partition("!") + username, _, hostname = username.partition("@") + return IRCHostmask(nickname, username, hostname, hostmask) + + +class IRCLine(object): + def __init__(self, tags, prefix, command, args, arbitrary, last, server): + self.tags = tags + self.prefix = prefix + self.command = command + self.args = args + self.arbitrary = arbitrary + self.last = last + self.server = server + +def parse_line(server, line): + tags = {} + prefix = None + command = None + + if line[0] == "@": + tags_prefix, line = line[1:].split(" ", 1) + for tag in filter(None, tags_prefix.split(";")): + tag, _, value = tag.partition("=") + tags[tag] = value + + line, _, arbitrary = line.partition(" :") + arbitrary = arbitrary or None + + if line[0] == ":": + prefix, line = line[1:].split(" ", 1) + prefix = seperate_hostmask(prefix) + command, _, line = line.partition(" ") + + args = line.split(" ") + last = arbitrary or args[-1] + + return IRCLine(tags, prefix, command, args, arbitrary, last, server) + +COLOR_WHITE, COLOR_BLACK, COLOR_BLUE, COLOR_GREEN = 0, 1, 2, 3 +COLOR_RED, COLOR_BROWN, COLOR_PURPLE, COLOR_ORANGE = 4, 5, 6, 7 +COLOR_YELLOW, COLOR_LIGHTGREEN, COLOR_CYAN, COLOR_LIGHTCYAN = (8, 9, + 10, 11) +COLOR_LIGHTBLUE, COLOR_PINK, COLOR_GREY, COLOR_LIGHTGREY = (12, 13, + 14, 15) +FONT_BOLD, FONT_ITALIC, FONT_UNDERLINE, FONT_INVERT = ("\x02", "\x1D", + "\x1F", "\x16") +FONT_COLOR, FONT_RESET = "\x03", "\x0F" +REGEX_COLOR = re.compile("%s\d\d(?:,\d\d)?" % FONT_COLOR) + +def color(s, foreground, background=None): + foreground = str(foreground).zfill(2) + if background: + background = str(background).zfill(2) + return "%s%s%s%s%s" % (FONT_COLOR, foreground, + "" if not background else ",%s" % background, s, FONT_COLOR) + +def bold(s): + return "%s%s%s" % (FONT_BOLD, s, FONT_BOLD) + +def underline(s): + return "%s%s%s" % (FONT_UNDERLINE, s, FONT_UNDERLINE) + +def strip_font(s): + s = s.replace(FONT_BOLD, "") + s = s.replace(FONT_ITALIC, "") + s = REGEX_COLOR.sub("", s) + s = s.replace(FONT_COLOR, "") + return s + -- cgit v1.3.1-10-gc9f91