diff options
| author | 2025-12-08 19:55:49 +0100 | |
|---|---|---|
| committer | 2025-12-08 20:00:43 +0100 | |
| commit | 5b79a9cf5530a835ff800ae6bcb05df5b73ec600 (patch) | |
| tree | a481b9f39daf7af730a9dac83b6b0919f0091841 /wikibot | |
| parent | user-config: set encoding to UTF-8 (diff) | |
| signature | ||
wikibot(wbmexclusions): use pywikibot
Diffstat (limited to 'wikibot')
| -rw-r--r-- | wikibot/wbmexclusions.py | 17 |
1 files changed, 7 insertions, 10 deletions
diff --git a/wikibot/wbmexclusions.py b/wikibot/wbmexclusions.py index f97adae..8bdc1a9 100644 --- a/wikibot/wbmexclusions.py +++ b/wikibot/wbmexclusions.py @@ -1,14 +1,11 @@ import collections -import mwclient +import pywikibot import re -import os - extractionPattern = re.compile('[^:/]+://(?:www\\.)?(\\S+)') countMarkBegin = '<!-- atwikibot:urlCount -->' countMarkEnd = '<!-- /atwikibot:urlCount -->' - def get_cleaned_url(line): # Extract the domain from the first URL appearing on a line, stripping away a leading "www." if any; returns None if no URL is found match = extractionPattern.search(line) @@ -19,7 +16,7 @@ def get_cleaned_url(line): def handle_page(site, page): # Extract domains from lines - entries = collections.deque((line, get_cleaned_url(line)) for line in page.text().split('\n')) + entries = collections.deque((line, get_cleaned_url(line)) for line in page.text.split('\n')) # Identify blocks of URLs and sort them entries.append((None, None)) # Dummy entry at the end to trigger a last sorting if necessary @@ -49,13 +46,13 @@ def handle_page(site, page): outputStr = outputStr[:countMarkBeginPos] + countMarkBegin + 'This list currently contains ' + str(urlCount) + ' URL' + ('s' if urlCount != 1 else '') + '.' + countMarkEnd + outputStr[countMarkEndPos + len(countMarkEnd):] # Update if necessary - if page.text() != outputStr: - if not site.logged_in: # Only log in when necessary - site.login(os.environ['ATWIKIBOT_USERNAME'], os.environ['ATWIKIBOT_PASSWORD']) - page.save(outputStr) + if page.text != outputStr: + site.login() + page.text = outputStr + page.save("Rendered from template") def main(): - site = mwclient.Site('wiki.archiveteam.org', path = '/') + site = pywikibot.Site() for page in site.allpages(prefix = 'List of websites excluded from the Wayback Machine'): handle_page(site, page) |
