diff options
| author | 2019-09-02 18:28:40 +0100 | |
|---|---|---|
| committer | 2019-09-02 18:28:40 +0100 | |
| commit | 24a510d1cae893d6bce07b6216ec398937e4584c (patch) | |
| tree | 79f158a9101988f10fac06d0dd1509ccd3506a7e /modules/mixed_unicode.py | |
| parent | remove strax.py - no one uses it (diff) | |
| signature | ||
move mixed_unicode.py to bitbot-modules
Diffstat (limited to 'modules/mixed_unicode.py')
| -rw-r--r-- | modules/mixed_unicode.py | 87 |
1 files changed, 0 insertions, 87 deletions
diff --git a/modules/mixed_unicode.py b/modules/mixed_unicode.py deleted file mode 100644 index 057b971a..00000000 --- a/modules/mixed_unicode.py +++ /dev/null @@ -1,87 +0,0 @@ -import collections, enum -from src import ModuleManager, utils - -class Script(enum.Enum): - Unknown = 0 - Latin = 1 - Cyrillic = 2 - Greek = 3 - Armenian = 4 - FullWidth = 5 - Coptic = 6 - Cherokee = 7 - TaiLe = 8 - -class ScoreReason(enum.Enum): - ScriptChange = 0 - ScriptChangeInWord = 1 - AdditionalScript = 2 - -WORD_SEPERATORS = [",", " ", "\t", "."] - -SCORE_LENGTH = 100 - -class Module(ModuleManager.BaseModule): - def _detect_script(self, char): - point = ord(char) - # NULL .. LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL - if 0x0000 <= point <= 0x02AF: - return Script.Latin - # GREEK CAPITAL LETTER HETA .. GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL - elif 0x0370 <= point <= 0x03ff: - return Script.Greek - # CYRILLIC CAPITAL LETTER IE WITH GRAVE .. CYRILLIC SMALL LETTER EL WITH DESCENDER - elif 0x0400 <= point <= 0x052F: - return Script.Cyrillic - # ARMENIAN CAPITAL LETTER AYB .. ARMENIAN HYPHEN - elif 0x0531 <= point <= 0x058A: - return Script.Armenian - # FULLWIDTH EXCLAMATION MARK .. FULLWIDTH RIGHT WHITE PARENTHESIS - elif 0xFF01 <= point <= 0xff60: - return Script.FullWidth - # COPTIC CAPITAL LETTER ALFA .. COPTIC MORPHOLOGICAL DIVIDER - elif 0x2C80 <= point <= 0x2CFF: - return Script.Coptic - # CHEROKEE LETTER A .. CHEROKEE SMALL LETTER MV - elif 0x13A0 <= point <= 0x13FD: - return Script.Cherokee - # TAI LE LETTER KA .. U+197F - elif 0x1950 <= point <= 0x197F: - return Script.TaiLe - return Script.Unknown - - @utils.hook("received.message.channel") - def channel_message(self, event): - last_script = None - last_was_separator = False - reasons = [] - scripts = set([]) - - for char in event["message"]: - if char in WORD_SEPERATORS: - last_was_separator = True - else: - script = self._detect_script(char) - if not script == Script.Unknown: - scripts.add(script) - if last_script and not script == last_script: - reasons.append(ScoreReason.ScriptChange) - if not last_was_separator: - reasons.append(ScoreReason.ScriptChangeInWord) - - last_script = script - - last_was_separator = False - - if len(scripts) > 1: - reasons.extend([ScoreReason.AdditionalScript]*(len(scripts)-1)) - - score = len(reasons) - if score > 0: - reasons_s = [] - for reason, count in collections.Counter(reasons).items(): - reasons_s.append("%s: %s" % (reason, count)) - - self.log.trace( - "Message given a mixed-unicode score of %s (reasons: %s)", - [score, ", ".join(reasons_s)]) |
