From 8e4c0f4963841b6dd31d11ba46335de231e61467 Mon Sep 17 00:00:00 2001
From: jesopo
Date: Fri, 4 Oct 2019 11:20:35 +0100
Subject: ignore one-char "words" in
if they're not a "letter"
---
modules/title.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/title.py b/modules/title.py
index d23696e6..b3692de3 100644
--- a/modules/title.py
+++ b/modules/title.py
@@ -21,14 +21,19 @@ class Module(ModuleManager.BaseModule):
def _different(self, url, title):
url = url.lower()
- title_words = [word.lower() for word in title.split()]
+ title_words = []
+ for title_word in title.split():
+ if len(title_word) > 1 or title_word.isalpha():
+ title_words.append(title_word.lower())
+
present = 0
for title_word in title_words:
if title_word in url:
present += 1
+ similarity = present/len(title_words)
# if at least 80% of words are in the URL, too similar
- if (present/len(title_words)) >= 0.8:
+ if similarity >= 0.8:
return False
return True
--
cgit v1.3.1-10-gc9f91