diff options
| author | 2025-12-18 00:10:33 +0000 | |
|---|---|---|
| committer | 2025-12-18 00:10:42 +0000 | |
| commit | 237dd4e55e2d139edd542e0c5eaba664b33e5fa8 (patch) | |
| tree | 95c2edd9ba465e6db4b9002d2d4984aa2ef90833 /atc-recentchanges.py | |
| parent | wiki-recentchanges: update for #at-changes (diff) | |
| signature | ||
wiki-recentchanges: move to root
Diffstat (limited to 'atc-recentchanges.py')
| -rwxr-xr-x | atc-recentchanges.py | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/atc-recentchanges.py b/atc-recentchanges.py new file mode 100755 index 0000000..f94f1b6 --- /dev/null +++ b/atc-recentchanges.py @@ -0,0 +1,110 @@ +#!/usr/bin/env -S python3 +from __future__ import annotations # PEP 585 for Python 3.7 and 3.8 +import logging +import requests +import time +import typing +import urllib.parse + + +ATW="https://wiki.archiveteam.org/" +ATAPI = ATW + "api.php" + +def get_new(newestRc: typing.Optional[tuple[str, int]]) -> tuple[list[dict], tuple[str, int]]: + ''' + Fetches edits made after newestRc = (newestTimestamp, newestRcId), which shall both refer to the last change already known/processed. + newestTimestamp is a timestamp in YYYY-MM-DDTHH:MM:SSZ format; newestRcId is the internal ID of the change. + The return value are the list of changes after newestRcId in chronological order (as returned by MW), and a new (newestTimestamp, newestRcId) tuple. + If newestRc is None, this returns an empty list of changes and the timestamp and rev ID of the most recent edit (for initialisation of a feed). + ''' + + logging.info(f'Fetching with newestRc = {newestRc!r}') + if newestRc is None: + newestTimestamp, newestRcId = None, None + else: + newestTimestamp, newestRcId = newestRc + rcend = f'&rcend={newestTimestamp}' if newestTimestamp else '' + r = requests.get(f'{ATAPI}?action=query&list=recentchanges&rcdir=older&format=json&rcprop=user|comment|timestamp|sizes|title|flags|ids|loginfo&continue=&rclimit=500{rcend}', + timeout = 5, + ) + r.raise_for_status() + o = r.json() + if not o['query']['recentchanges']: + return [], (newestTimestamp, newestRcId) + rc = o['query']['recentchanges'] + changes = [] + if newestRcId is not None: + for c in rc: + if c['rcid'] <= newestRcId: + break + changes.append(c) + return changes[::-1], (rc[0]['timestamp'], rc[0]['rcid']) + + +def truncate(s: str, limit: int) -> str: + '''If s is longer than limit, split on words and return a truncated version no longer than limit.''' + if len(s) <= limit: + return s + words = s.split(' ') + trunLength = 0 + trun = [] + while words and trunLength + 1 + len(words[0]) < limit - 1: + word = words.pop(0) + trunLength += 1 + len(word) + trun.append(word) + return f'{" ".join(trun)}…' + + +def format_change(change: dict) -> typing.Optional[str]: + '''Formats a change for posting to IRC. Returns None if it's an event that isn't handled and not to be reported to IRC.''' + title = change['title'] + urlSafeTitle = urllib.parse.quote(change['title']) + url = None + if change['type'] == 'new': + verb = 'created' + url = f'{ATW}?title={urlSafeTitle}&oldid={change["revid"]}' + elif change['type'] == 'edit': + verb = 'edited' + url = f'{ATW}?title={urlSafeTitle}&diff={change["revid"]}&oldid={change["old_revid"]}' + elif change['type'] == 'log' and change['logtype'] == 'delete': + verb = 'deleted' + url = '' + elif change['type'] == 'log' and change['logtype'] == 'move': + verb = 'moved' + title = f'{change["logparams"]["target_title"]} (from {change["title"]})' + url = f'{ATW}?title={urllib.parse.quote(change["logparams"]["target_title"])}' + else: + return + comment = change["comment"] if change["comment"] else '' + url = f'{url}' if url else '' + return f'AT Wiki | {title} was {verb} by {change["user"]} at {change["timestamp"]} | {comment} | {url}' + + +def main(): + # Get initial TS/rcid tuple + _, newestRc = get_new(None) + time.sleep(60) + while True: + try: + changes, newestRc = get_new(newestRc) + except Exception as e: + logging.error('get_new failed', exc_info = e) + time.sleep(60) + continue + for change in changes: + formatted = format_change(change) + if formatted: + print(formatted, flush = True) + else: + logging.warning(f'Suppressed change because it was un- or ill-formatted: {change!r}') + time.sleep(60) + + +if __name__ == '__main__': + logging.basicConfig( + format = '{asctime}.{msecs:03.0f} {levelname} {name} {message}', + datefmt = '%Y-%m-%d %H:%M:%S', + style = '{', + level = logging.INFO, + ) + main() |
