diff options
| author | 2018-11-20 13:47:38 +0000 | |
|---|---|---|
| committer | 2018-11-20 13:47:38 +0000 | |
| commit | 537e2eebc4e4f88445012d04ee40f067e4c429fa (patch) | |
| tree | 0d9167af0f3d0b7f535058ce8055109dda8b318c | |
| parent | Add a point to a message's score for each additional script they use (diff) | |
| signature | ||
Show reasons for score points (mixed_unicode.py)
| -rw-r--r-- | modules/mixed_unicode.py | 26 |
1 files changed, 20 insertions, 6 deletions
diff --git a/modules/mixed_unicode.py b/modules/mixed_unicode.py index 3bfcaf91..5277c8b3 100644 --- a/modules/mixed_unicode.py +++ b/modules/mixed_unicode.py @@ -1,4 +1,4 @@ -import enum +import enum, itertools from src import ModuleManager, utils class Script(enum.Enum): @@ -11,6 +11,12 @@ class Script(enum.Enum): Coptic = 6 Cherokee = 7 TaiLe = 8 + +class ScoreReason(enum.Enum): + ScriptChange = 0 + ScriptChangeInWord = 1 + AdditionalScript = 2 + WORD_SEPERATORS = [",", " ", "\t", "."] SCORE_LENGTH = 100 @@ -48,7 +54,7 @@ class Module(ModuleManager.BaseModule): def channel_message(self, event): last_script = None last_was_separator = False - score = 0 + score_reasons = [] scripts = set([]) for char in event["message"]: @@ -59,15 +65,23 @@ class Module(ModuleManager.BaseModule): scripts.add(script) if not script == Script.Unknown: if last_script and not script == last_script: - score += 1 + score_reasons.append(ScoreReason.ScriptChange) if not last_was_separator: - score += 1 + score_reasons.append(ScoreReason.ScriptChangeInWord) last_script = script last_was_separator = False if len(scripts) > 1: - score += len(scripts)-1 + score_reasons.extend([ScoreReason.AdditonalScript]*(len(scripts)-1)) + + score = len(score_reasons) + reasons_s = [] + for reason, group in itertools.groupby(score_reasons): + reasons_s.append("%s: %s" % (reason, len(list(group)))) + if score > 0: - self.log.trace("Message given a mixed-unicode score of %s", [score]) + self.log.trace( + "Message given a mixed-unicode score of %s (reasons: %s)", + [score, ", ".join(reasons_s)]) |
