diff options
| author | 2016-03-29 12:56:58 +0100 | |
|---|---|---|
| committer | 2016-03-29 12:56:58 +0100 | |
| commit | f943d63098a50746f4e470e403a991a4d9713030 (patch) | |
| tree | deeb98058917d0155227211d72576f0cbab28d3f /Utils.py | |
| parent | Initial commit (diff) | |
first commit.
Diffstat (limited to 'Utils.py')
| -rw-r--r-- | Utils.py | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/Utils.py b/Utils.py new file mode 100644 index 00000000..00c2699b --- /dev/null +++ b/Utils.py @@ -0,0 +1,158 @@ +import json, re, traceback, urllib.request, urllib.parse +import bs4 + +USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36") +REGEX_HTTP = re.compile("https?://", re.I) + +def remove_colon(s): + if s.startswith(":"): + s = s[1:] + return s + +def arbitrary(s, n): + return remove_colon(" ".join(s[n:])) + +def seperate_hostmask(hostmask): + hostmask = remove_colon(hostmask) + first_delim = hostmask.find("!") + second_delim = hostmask.find("@") + nickname = username = hostname = hostmask + if first_delim > -1 and second_delim > first_delim: + nickname, username = hostmask.split("!", 1) + username, hostname = hostmask.split("@", 1) + return nickname, username, hostname + +def get_url(url, **kwargs): + scheme = urllib.parse.urlparse(url).scheme + if not scheme: + url = "http://%s" % url + + method = kwargs.get("method", "GET") + get_params = kwargs.get("get_params", "") + post_params = kwargs.get("post_params", None) + headers = kwargs.get("headers", {}) + if get_params: + get_params = "?%s" % urllib.parse.urlencode(get_params) + if post_params: + post_params = urllib.parse.urlencode(post_params).encode("utf8") + url = "%s%s" % (url, get_params) + + request = urllib.request.Request(url, post_params) + request.add_header("Accept-Language", "en-US") + request.add_header("User-Agent", USER_AGENT) + for header, value in headers.items(): + request.add_header(header, value) + request.method = method + + try: + response = urllib.request.urlopen(request) + except: + traceback.print_exc() + return False + + response_content = response.read() + encoding = response.info().get_content_charset() + if kwargs.get("soup"): + return bs4.BeautifulSoup(response_content, "lxml") + if not encoding: + soup = bs4.BeautifulSoup(response_content, "lxml") + metas = soup.find_all("meta") + for meta in metas: + if "charset=" in meta.get("content", ""): + encoding = meta.get("content").split("charset=", 1)[1 + ].split(";", 1)[0] + elif meta.get("charset", ""): + encoding = meta.get("charset") + else: + continue + break + if not encoding: + for item in soup.contents: + if isinstance(item, bs4.Doctype): + if item == "html": + encoding = "utf8" + else: + encoding = "latin-1" + break + response_content = response_content.decode(encoding or "utf8") + data = response_content + if kwargs.get("json"): + try: + data = json.loads(response_content) + except json.decoder.JSONDecodeError: + traceback.print_exc() + return False + if kwargs.get("code"): + return response.code, data + else: + return data + +COLOR_WHITE, COLOR_BLACK, COLOR_BLUE, COLOR_GREEN = 0, 1, 2, 3 +COLOR_RED, COLOR_BROWN, COLOR_PURPLE, COLOR_ORANGE = 4, 5, 6, 7 +COLOR_YELLOW, COLOR_LIGHTGREEN, COLOR_CYAN, COLOR_LIGHTCYAN = (8, 9, + 10, 11) +COLOR_LIGHTBLUE, COLOR_PINK, COLOR_GREY, COLOR_LIGHTGREY = (12, 13, + 14, 15) +FONT_BOLD, FONT_ITALIC, FONT_UNDERLINE, FONT_INVERT = ("\x02", "\x1D", + "\x1F", "\x16") +FONT_COLOR, FONT_RESET = "\x03", "\x0F" + +def color(foreground, background=None): + foreground = str(foreground).zfill(2) + if background: + background = str(backbround).zfill(2) + return "%s%s%s" % (FONT_COLOR, foreground, + "" if not background else ",%s" % background) + +TIME_SECOND = 1 +TIME_MINUTE = TIME_SECOND*60 +TIME_HOUR = TIME_MINUTE*60 +TIME_DAY = TIME_HOUR*24 +TIME_WEEK = TIME_DAY*7 + +def time_unit(seconds): + since = None + unit = None + if seconds >= TIME_WEEK: + since = seconds/TIME_WEEK + unit = "week" + elif seconds >= TIME_DAY: + since = seconds/TIME_DAY + unit = "day" + elif seconds >= TIME_HOUR: + since = seconds/TIME_HOUR + unit = "hour" + elif seconds >= TIME_MINUTE: + since = seconds/TIME_MINUTE + unit = "minute" + else: + since = seconds + unit = "second" + since = int(since) + if since > 1: + unit = "%ss" % unit # pluralise the unit + return [since, unit] + +REGEX_PRETTYTIME = re.compile("\d+[wdhms]", re.I) + +SECONDS_MINUTES = 60 +SECONDS_HOURS = SECONDS_MINUTES*60 +SECONDS_DAYS = SECONDS_HOURS*24 +SECONDS_WEEKS = SECONDS_DAYS*7 + +def from_pretty_time(pretty_time): + seconds = 0 + for match in re.findall(REGEX_PRETTYTIME, pretty_time): + number, unit = int(match[:-1]), match[-1].lower() + if unit == "m": + number = number*SECONDS_MINUTES + elif unit == "h": + number = number*SECONDS_HOURS + elif unit == "d": + number = number*SECONDS_DAYS + elif unit == "w": + number = number*SECONDS_WEEKS + seconds += number + if seconds > 0: + return seconds |
