aboutsummaryrefslogtreecommitdiff
path: root/modules/rss.py
diff options
context:
space:
mode:
authorGravatar jesopo2019-08-12 15:08:36 +0100
committerGravatar jesopo2019-08-12 15:08:36 +0100
commitff8b81a7610253cf6fdf856bf0fe1cda2287008f (patch)
tree6a28b275832ec6c052d033a05e434dcc4c84b6ea /modules/rss.py
parentthrottle negative and positive karma independently of each other (diff)
signature
Store more "seen ids", hash seen ids so they take up a constant space
Diffstat (limited to 'modules/rss.py')
-rw-r--r--modules/rss.py16
1 files changed, 9 insertions, 7 deletions
diff --git a/modules/rss.py b/modules/rss.py
index ed2cc044..3aaf6c36 100644
--- a/modules/rss.py
+++ b/modules/rss.py
@@ -1,7 +1,7 @@
#--depends-on config
#--depends-on shorturl
-import time
+import hashlib, time
from src import ModuleManager, utils
import feedparser
@@ -63,15 +63,14 @@ class Module(ModuleManager.BaseModule):
feed = feedparser.parse(pages[url].data)
feed_title = feed["feed"].get("title", None)
+ max_ids = len(feed["entries"])*10
for server, channel in channels:
seen_ids = channel.get_setting("rss-seen-ids-%s" % url, [])
- new_ids = []
valid = 0
for entry in feed["entries"][::-1]:
- entry_id = entry.get("id", entry["link"])
+ entry_id = self._get_id(entry)
if entry_id in seen_ids:
- new_ids.append(entry_id)
continue
if valid == 3:
@@ -84,15 +83,18 @@ class Module(ModuleManager.BaseModule):
self.events.on("send.stdout").call(target=channel,
module_name="RSS", server=server, message=output)
- new_ids.append(entry_id)
+ seen_ids.append(entry_id)
- channel.set_setting("rss-seen-ids-%s" % url, new_ids)
+ if len(seen_ids) > max_ids:
+ seen_ids = seen_ids[len(seen_ids)-max_ids:]
+ channel.set_setting("rss-seen-ids-%s" % url, seen_ids)
total_milliseconds = (time.monotonic() - start_time) * 1000
self.log.trace("Polled RSS feeds in %fms", [total_milliseconds])
def _get_id(self, entry):
- return entry.get("id", entry["link"])
+ return "sha1:%s" % hashlib.sha1(entry.get("id", entry["link"]
+ ).encode("utf8")).hexdigest()
def _get_entries(self, url, max: int=None):
try: