format multi-line toots on a single line using things like double space

author: jesopo 2019-10-09 16:44:54 +0100
committer: jesopo 2019-10-09 16:44:54 +0100
commit: 84ec6d812f56b00830352f225feaf44c14ee9aac (patch)
tree: 1005eddb0a3357fdbb769681d25130104c2fd01e /modules
parent: format multi-line tweets on a single line using things like double spaces (diff)
download: bitbot-84ec6d812f56b00830352f225feaf44c14ee9aac.tar.gz
bitbot-84ec6d812f56b00830352f225feaf44c14ee9aac.tar.bz2
bitbot-84ec6d812f56b00830352f225feaf44c14ee9aac.zip
1 files changed, 24 insertions, 2 deletions
diff --git a/modules/fediverse/ap_utils.py b/modules/fediverse/ap_utils.py
index 91e1be84..c13e824b 100644
--- a/modules/fediverse/ap_utils.py
+++ b/modules/fediverse/ap_utils.py
@@ -1,4 +1,5 @@
 import urllib.parse
+import bs4
 from src import IRCBot, utils
 
 LD_TYPE = ("application/ld+json; "
@@ -56,6 +57,27 @@ def find_actor(username, instance):
         if link["type"] == ACTIVITY_TYPE:
             return link["href"]
 
+def _normalise_note(content):
+    soup = bs4.BeautifulSoup(content, "html.parser")
+    lines = []
+    for element in soup.find_all():
+        out = ""
+        if element.text.strip() == "":
+            continue
+        elif element.name == "p":
+            for subitem in element.contents:
+                if type(subitem) == bs4.element.Tag:
+                    if subitem.name == "br":
+                        lines.append(out)
+                        out = ""
+                else:
+                    out += subitem
+        else:
+            continue
+
+        lines.append(out.replace("  ", " "))
+    return "  ".join(lines)
+
 def format_note(actor, note, type="Create"):
     if type == "Announce":
         retoot_url = note
@@ -66,13 +88,13 @@ def format_note(actor, note, type="Create"):
         original_tooter = ap_actor.Actor(retoot.data["attributedTo"])
         original_tooter.load()
         retooted_user = "@%s@%s" % (original_tooter.username, retoot_instance)
-        retoot_content = utils.http.strip_html(retoot.data["content"])
+        retoot_content = _normalise_note(retoot.data["content"])
 
         return (retoot.data.get("summary", None),  "%s (boost %s): %s - %s" % (
             actor.username, retooted_user, retoot_content), retoot_url)
 
     elif type == "Create":
-        content = utils.http.strip_html(note["content"])
+        content = _normalise_note(note["content"])
         url = note.get("url", note["id"])
 
         return (note.get("summary", None),
author	jesopo	2019-10-09 16:44:54 +0100
committer	jesopo	2019-10-09 16:44:54 +0100
commit	84ec6d812f56b00830352f225feaf44c14ee9aac (patch)
tree	1005eddb0a3357fdbb769681d25130104c2fd01e /modules
parent	format multi-line tweets on a single line using things like double spaces (diff)
download	bitbot-84ec6d812f56b00830352f225feaf44c14ee9aac.tar.gz bitbot-84ec6d812f56b00830352f225feaf44c14ee9aac.tar.bz2 bitbot-84ec6d812f56b00830352f225feaf44c14ee9aac.zip