summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar klea2025-12-31 22:07:25 +0000
committerGravatar klea2025-12-31 22:07:40 +0000
commitb96a61ab8069e55de39a16d159453db0c6df2aa4 (patch)
tree3532748cd08f6dab513b8abca6b05c56c1ee5c2a
parentpkgs(shell): add wget-lua (diff)
signature
wikibot/discourse: don't order it
-rw-r--r--wikibot/discourse.py63
1 files changed, 0 insertions, 63 deletions
diff --git a/wikibot/discourse.py b/wikibot/discourse.py
deleted file mode 100644
index e96a396..0000000
--- a/wikibot/discourse.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import collections
-import pywikibot
-import re
-
-extractionPattern = re.compile('(?:^\\* \\[?ht)[^:/]+://(\\S+)')
-text_version = re.compile('\\[(?:[^\\s:/]+://\\S+) (.*)\\]')
-time_version = re.compile('\\* (.*) \\({{ArchiveBot job\\|\\S+ *}}\\)?, started ([0-9-]*)(?:[,;].+)?\\)')
-
-def get_cleaned_url(line):
- TimeMatch = time_version.search(line)
- if TimeMatch:
- date = ''.join(TimeMatch.group(2).split('-'))
- url = TimeMatch.group(1).lower()
- content = date + " " + url
- return (int(date), url)
- # Extract the forum name from the first [] on a line, else domain from the first URL appearing on a line; returns None if no URL is found
- NameMatch = text_version.search(line)
- if NameMatch:
- return (0, NameMatch.group(1).lower())
- URLMatch = extractionPattern.search(line)
- if URLMatch:
- return (0, URLMatch.group(1).lower())
- return None
-
-
-def handle_page(site, page):
- # Extract domains from lines
- entries = collections.deque((line, get_cleaned_url(line)) for line in page.text.split('\n'))
-
- # Identify blocks of URLs and sort them
- entries.append((None, None)) # Dummy entry at the end to trigger a last sorting if necessary
- output = []
- currentBlock = []
- urlCount = 0
- while entries:
- line, url = entries.popleft()
- if url is None:
- # Either a line without a URL or the dummy entry at the end
- if currentBlock:
- currentBlock.sort(key = lambda x: x[1])
- output.extend(x[0] for x in currentBlock)
- urlCount += len(currentBlock)
- currentBlock = []
- if line is not None: # Ignore the dummy entry
- output.append(line)
- elif line is not None:
- # line and url are not None, i.e. this is a line with a URL in it
- currentBlock.append((line, url))
-
- outputStr = '\n'.join(output)
-
- # Update if necessary
- if page.text != outputStr:
- site.login()
- page.text = outputStr
- page.save("Reordered websites")
-
-def main():
- site = pywikibot.Site('en', 'ArchiveTeam')
- for page in site.allpages(prefix = 'Discourse'):
- handle_page(site, page)
-
-if __name__ == '__main__': main()