diff options
| -rw-r--r-- | wikibot/wbmexclusions.py | 17 |
1 files changed, 7 insertions, 10 deletions
diff --git a/wikibot/wbmexclusions.py b/wikibot/wbmexclusions.py index f97adae..8bdc1a9 100644 --- a/wikibot/wbmexclusions.py +++ b/wikibot/wbmexclusions.py @@ -1,14 +1,11 @@ import collections -import mwclient +import pywikibot import re -import os - extractionPattern = re.compile('[^:/]+://(?:www\\.)?(\\S+)') countMarkBegin = '<!-- atwikibot:urlCount -->' countMarkEnd = '<!-- /atwikibot:urlCount -->' - def get_cleaned_url(line): # Extract the domain from the first URL appearing on a line, stripping away a leading "www." if any; returns None if no URL is found match = extractionPattern.search(line) @@ -19,7 +16,7 @@ def get_cleaned_url(line): def handle_page(site, page): # Extract domains from lines - entries = collections.deque((line, get_cleaned_url(line)) for line in page.text().split('\n')) + entries = collections.deque((line, get_cleaned_url(line)) for line in page.text.split('\n')) # Identify blocks of URLs and sort them entries.append((None, None)) # Dummy entry at the end to trigger a last sorting if necessary @@ -49,13 +46,13 @@ def handle_page(site, page): outputStr = outputStr[:countMarkBeginPos] + countMarkBegin + 'This list currently contains ' + str(urlCount) + ' URL' + ('s' if urlCount != 1 else '') + '.' + countMarkEnd + outputStr[countMarkEndPos + len(countMarkEnd):] # Update if necessary - if page.text() != outputStr: - if not site.logged_in: # Only log in when necessary - site.login(os.environ['ATWIKIBOT_USERNAME'], os.environ['ATWIKIBOT_PASSWORD']) - page.save(outputStr) + if page.text != outputStr: + site.login() + page.text = outputStr + page.save("Rendered from template") def main(): - site = mwclient.Site('wiki.archiveteam.org', path = '/') + site = pywikibot.Site() for page in site.allpages(prefix = 'List of websites excluded from the Wayback Machine'): handle_page(site, page) |
