aboutsummaryrefslogtreecommitdiff
path: root/modules/title.py
diff options
context:
space:
mode:
authorGravatar jesopo2019-10-04 11:20:35 +0100
committerGravatar jesopo2019-10-04 11:20:35 +0100
commit8e4c0f4963841b6dd31d11ba46335de231e61467 (patch)
tree0d0f8aa28d0d5194c5889daefdba405ad3450277 /modules/title.py
parentdon't auto-title when a URL contains most of it's <title> (diff)
signature
ignore one-char "words" in <title> if they're not a "letter"
Diffstat (limited to 'modules/title.py')
-rw-r--r--modules/title.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/modules/title.py b/modules/title.py
index d23696e6..b3692de3 100644
--- a/modules/title.py
+++ b/modules/title.py
@@ -21,14 +21,19 @@ class Module(ModuleManager.BaseModule):
def _different(self, url, title):
url = url.lower()
- title_words = [word.lower() for word in title.split()]
+ title_words = []
+ for title_word in title.split():
+ if len(title_word) > 1 or title_word.isalpha():
+ title_words.append(title_word.lower())
+
present = 0
for title_word in title_words:
if title_word in url:
present += 1
+ similarity = present/len(title_words)
# if at least 80% of words are in the URL, too similar
- if (present/len(title_words)) >= 0.8:
+ if similarity >= 0.8:
return False
return True