aboutsummaryrefslogtreecommitdiff
path: root/modules/mixed_unicode.py
diff options
context:
space:
mode:
Diffstat (limited to 'modules/mixed_unicode.py')
-rw-r--r--modules/mixed_unicode.py26
1 files changed, 20 insertions, 6 deletions
diff --git a/modules/mixed_unicode.py b/modules/mixed_unicode.py
index 3bfcaf91..5277c8b3 100644
--- a/modules/mixed_unicode.py
+++ b/modules/mixed_unicode.py
@@ -1,4 +1,4 @@
-import enum
+import enum, itertools
from src import ModuleManager, utils
class Script(enum.Enum):
@@ -11,6 +11,12 @@ class Script(enum.Enum):
Coptic = 6
Cherokee = 7
TaiLe = 8
+
+class ScoreReason(enum.Enum):
+ ScriptChange = 0
+ ScriptChangeInWord = 1
+ AdditionalScript = 2
+
WORD_SEPERATORS = [",", " ", "\t", "."]
SCORE_LENGTH = 100
@@ -48,7 +54,7 @@ class Module(ModuleManager.BaseModule):
def channel_message(self, event):
last_script = None
last_was_separator = False
- score = 0
+ score_reasons = []
scripts = set([])
for char in event["message"]:
@@ -59,15 +65,23 @@ class Module(ModuleManager.BaseModule):
scripts.add(script)
if not script == Script.Unknown:
if last_script and not script == last_script:
- score += 1
+ score_reasons.append(ScoreReason.ScriptChange)
if not last_was_separator:
- score += 1
+ score_reasons.append(ScoreReason.ScriptChangeInWord)
last_script = script
last_was_separator = False
if len(scripts) > 1:
- score += len(scripts)-1
+ score_reasons.extend([ScoreReason.AdditonalScript]*(len(scripts)-1))
+
+ score = len(score_reasons)
+ reasons_s = []
+ for reason, group in itertools.groupby(score_reasons):
+ reasons_s.append("%s: %s" % (reason, len(list(group))))
+
if score > 0:
- self.log.trace("Message given a mixed-unicode score of %s", [score])
+ self.log.trace(
+ "Message given a mixed-unicode score of %s (reasons: %s)",
+ [score, ", ".join(reasons_s)])