aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--modules/acronym.py12
-rw-r--r--modules/eval_lua.py7
-rw-r--r--modules/fediverse/ap_utils.py5
-rw-r--r--modules/title.py10
-rw-r--r--src/utils/http.py21
5 files changed, 24 insertions, 31 deletions
diff --git a/modules/acronym.py b/modules/acronym.py
index 212465d5..116317a1 100644
--- a/modules/acronym.py
+++ b/modules/acronym.py
@@ -9,11 +9,13 @@ class Module(ModuleManager.BaseModule):
@utils.kwarg("usage", "<acronym>")
def acronym(self, event):
query = event["args_split"][0].upper()
- response = utils.http.request(API % query, parse=True)
- if response.data:
- acronyms = []
- for element in response.data.find_all("acro"):
- acronyms.append(element.expan.string)
+ response = utils.http.request(API % query)
+
+ acronyms = []
+ for element in response.soup().find_all("acro"):
+ acronyms.append(element.expan.string)
+
+ if acronyms:
event["stdout"].write("%s: %s" % (query, ", ".join(acronyms)))
else:
raise utils.EventResultsError()
diff --git a/modules/eval_lua.py b/modules/eval_lua.py
index 102aadc7..6b34c7c2 100644
--- a/modules/eval_lua.py
+++ b/modules/eval_lua.py
@@ -10,15 +10,14 @@ class Module(ModuleManager.BaseModule):
@utils.hook("received.command.lua", min_args=1)
def eval(self, event):
try:
- page = utils.http.request(EVAL_URL,
- post_data={"input": event["args"]},
- method="POST", parse=True)
+ page = utils.http.request(EVAL_URL, post_data=
+ {"input": event["args"]}, method="POST")
except socket.timeout:
raise utils.EventError("%s: eval timed out" %
event["user"].nickname)
if page:
- textareas = page.data.find_all("textarea")
+ textareas = page.soup().find_all("textarea")
if len(textareas) > 1:
out = textareas[1].text.strip("\n")
event["stdout"].write("%s: %s" % (event["user"].nickname, out))
diff --git a/modules/fediverse/ap_utils.py b/modules/fediverse/ap_utils.py
index fc6ea5fd..686b8850 100644
--- a/modules/fediverse/ap_utils.py
+++ b/modules/fediverse/ap_utils.py
@@ -38,8 +38,7 @@ class FindActorException(Exception):
def find_actor(username, instance):
hostmeta = HOSTMETA_TEMPLATE % instance
- hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance,
- parse=True, check_content_type=False)
+ hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance)
try:
hostmeta = utils.http.request(hostmeta_request)
except:
@@ -47,7 +46,7 @@ def find_actor(username, instance):
webfinger_url = None
if hostmeta.code == 200:
- for item in hostmeta.data.find_all("link"):
+ for item in hostmeta.soup().find_all("link"):
if item["rel"] and item["rel"][0] == "lrdd":
webfinger_url = item["template"]
break
diff --git a/modules/title.py b/modules/title.py
index d02f7249..01425dbe 100644
--- a/modules/title.py
+++ b/modules/title.py
@@ -50,14 +50,16 @@ class Module(ModuleManager.BaseModule):
return -1, None
try:
- page = utils.http.request(url, parse=True)
- except utils.http.HTTPWrongContentTypeException:
- return -1, None
+ page = utils.http.request(url)
except Exception as e:
self.log.error("failed to get URL title for %s: %s", [url, str(e)])
return -1, None
- if page.data.title:
+ if not page.content_type in utils.http.SOUP_CONTENT_TYPES:
+ return -1, None
+ page = page.soup()
+
+ if page.title:
title = utils.parse.line_normalise(page.data.title.text)
if not title:
return -3, None
diff --git a/src/utils/http.py b/src/utils/http.py
index 9e9bf3e6..80bf5eae 100644
--- a/src/utils/http.py
+++ b/src/utils/http.py
@@ -72,9 +72,7 @@ class Request(object):
allow_redirects: bool = True
check_content_type: bool = True
- parse: bool = False
detect_encoding: bool = True
- parser: str = "lxml"
fallback_encoding: typing.Optional[str] = None
content_type: typing.Optional[str] = None
proxy: typing.Optional[str] = None
@@ -126,8 +124,12 @@ class Response(object):
self.encoding = encoding
self.headers = headers
self.cookies = cookies
- def json(self):
+ def decode(self) -> str:
+ return self.data
+ def json(self) -> typing.Any:
return _json.loads(self.data)
+ def soup(self, parser: str="lxml") -> bs4.BeautifulSoup:
+ return bs4.BeautifulSoup(self.decode(), parser)
def _meta_content(s: str) -> typing.Dict[str, str]:
out = {}
@@ -200,23 +202,12 @@ def _request(request_obj: Request) -> Response:
if (request_obj.detect_encoding and
response.content_type and
response.content_type in SOUP_CONTENT_TYPES):
- souped = bs4.BeautifulSoup(response.data, request_obj.parser)
+ souped = bs4.BeautifulSoup(response.data, "lxml")
encoding = _find_encoding(souped) or encoding
def _decode_data():
return response.data.decode(encoding)
- if request_obj.parse:
- if (not request_obj.check_content_type or
- response.content_type in SOUP_CONTENT_TYPES):
- souped = bs4.BeautifulSoup(_decode_data(), request_obj.parser)
- response.data = souped
- return response
- else:
- raise HTTPWrongContentTypeException(
- "Tried to soup non-html/non-xml data (%s)" %
- response.content_type)
-
if request_obj.json and response.data:
data = _decode_data()
try: