diff options
| author | 2025-12-31 22:07:25 +0000 | |
|---|---|---|
| committer | 2025-12-31 22:07:40 +0000 | |
| commit | b96a61ab8069e55de39a16d159453db0c6df2aa4 (patch) | |
| tree | 3532748cd08f6dab513b8abca6b05c56c1ee5c2a | |
| parent | pkgs(shell): add wget-lua (diff) | |
| signature | ||
wikibot/discourse: don't order it
| -rw-r--r-- | wikibot/discourse.py | 63 |
1 files changed, 0 insertions, 63 deletions
diff --git a/wikibot/discourse.py b/wikibot/discourse.py deleted file mode 100644 index e96a396..0000000 --- a/wikibot/discourse.py +++ /dev/null @@ -1,63 +0,0 @@ -import collections -import pywikibot -import re - -extractionPattern = re.compile('(?:^\\* \\[?ht)[^:/]+://(\\S+)') -text_version = re.compile('\\[(?:[^\\s:/]+://\\S+) (.*)\\]') -time_version = re.compile('\\* (.*) \\({{ArchiveBot job\\|\\S+ *}}\\)?, started ([0-9-]*)(?:[,;].+)?\\)') - -def get_cleaned_url(line): - TimeMatch = time_version.search(line) - if TimeMatch: - date = ''.join(TimeMatch.group(2).split('-')) - url = TimeMatch.group(1).lower() - content = date + " " + url - return (int(date), url) - # Extract the forum name from the first [] on a line, else domain from the first URL appearing on a line; returns None if no URL is found - NameMatch = text_version.search(line) - if NameMatch: - return (0, NameMatch.group(1).lower()) - URLMatch = extractionPattern.search(line) - if URLMatch: - return (0, URLMatch.group(1).lower()) - return None - - -def handle_page(site, page): - # Extract domains from lines - entries = collections.deque((line, get_cleaned_url(line)) for line in page.text.split('\n')) - - # Identify blocks of URLs and sort them - entries.append((None, None)) # Dummy entry at the end to trigger a last sorting if necessary - output = [] - currentBlock = [] - urlCount = 0 - while entries: - line, url = entries.popleft() - if url is None: - # Either a line without a URL or the dummy entry at the end - if currentBlock: - currentBlock.sort(key = lambda x: x[1]) - output.extend(x[0] for x in currentBlock) - urlCount += len(currentBlock) - currentBlock = [] - if line is not None: # Ignore the dummy entry - output.append(line) - elif line is not None: - # line and url are not None, i.e. this is a line with a URL in it - currentBlock.append((line, url)) - - outputStr = '\n'.join(output) - - # Update if necessary - if page.text != outputStr: - site.login() - page.text = outputStr - page.save("Reordered websites") - -def main(): - site = pywikibot.Site('en', 'ArchiveTeam') - for page in site.allpages(prefix = 'Discourse'): - handle_page(site, page) - -if __name__ == '__main__': main() |
