wikibot/discourse: don't order it

author: klea 2025-12-31 22:07:25 +0000
committer: klea 2025-12-31 22:07:40 +0000
commit: b96a61ab8069e55de39a16d159453db0c6df2aa4 (patch)
tree: 3532748cd08f6dab513b8abca6b05c56c1ee5c2a
parent: pkgs(shell): add wget-lua (diff)
1 files changed, 0 insertions, 63 deletions
diff --git a/wikibot/discourse.py b/wikibot/discourse.py
deleted file mode 100644
index e96a396..0000000
--- a/wikibot/discourse.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import collections
-import pywikibot
-import re
-
-extractionPattern = re.compile('(?:^\\* \\[?ht)[^:/]+://(\\S+)')
-text_version = re.compile('\\[(?:[^\\s:/]+://\\S+) (.*)\\]')
-time_version = re.compile('\\* (.*) \\({{ArchiveBot job\\|\\S+ *}}\\)?, started ([0-9-]*)(?:[,;].+)?\\)')
-
-def get_cleaned_url(line):
-    TimeMatch = time_version.search(line)
-    if TimeMatch:
-        date = ''.join(TimeMatch.group(2).split('-'))
-        url = TimeMatch.group(1).lower()
-        content = date + " " + url
-        return (int(date), url)
-    # Extract the forum name from the first [] on a line, else domain from the first URL appearing on a line; returns None if no URL is found
-    NameMatch = text_version.search(line)
-    if NameMatch:
-        return (0, NameMatch.group(1).lower())
-    URLMatch = extractionPattern.search(line)
-    if URLMatch:
-        return (0, URLMatch.group(1).lower())
-    return None
-
-
-def handle_page(site, page):
-    # Extract domains from lines
-    entries = collections.deque((line, get_cleaned_url(line)) for line in page.text.split('\n'))
-
-    # Identify blocks of URLs and sort them
-    entries.append((None, None)) # Dummy entry at the end to trigger a last sorting if necessary
-    output = []
-    currentBlock = []
-    urlCount = 0
-    while entries:
-        line, url = entries.popleft()
-        if url is None:
-            # Either a line without a URL or the dummy entry at the end
-            if currentBlock:
-                currentBlock.sort(key = lambda x: x[1])
-                output.extend(x[0] for x in currentBlock)
-                urlCount += len(currentBlock)
-                currentBlock = []
-            if line is not None: # Ignore the dummy entry
-                output.append(line)
-        elif line is not None:
-            # line and url are not None, i.e. this is a line with a URL in it
-            currentBlock.append((line, url))
-
-    outputStr = '\n'.join(output)
-
-    # Update if necessary
-    if page.text != outputStr:
-        site.login()
-        page.text = outputStr
-        page.save("Reordered websites")
-
-def main():
-    site = pywikibot.Site('en', 'ArchiveTeam')
-    for page in site.allpages(prefix = 'Discourse'):
-        handle_page(site, page)
-
-if __name__ == '__main__': main()
author	klea	2025-12-31 22:07:25 +0000
committer	klea	2025-12-31 22:07:40 +0000
commit	b96a61ab8069e55de39a16d159453db0c6df2aa4 (patch)
tree	3532748cd08f6dab513b8abca6b05c56c1ee5c2a
parent	pkgs(shell): add wget-lua (diff)
signature