summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wikibot/wbmexclusions.py17
1 files changed, 7 insertions, 10 deletions
diff --git a/wikibot/wbmexclusions.py b/wikibot/wbmexclusions.py
index f97adae..8bdc1a9 100644
--- a/wikibot/wbmexclusions.py
+++ b/wikibot/wbmexclusions.py
@@ -1,14 +1,11 @@
import collections
-import mwclient
+import pywikibot
import re
-import os
-
extractionPattern = re.compile('[^:/]+://(?:www\\.)?(\\S+)')
countMarkBegin = '<!-- atwikibot:urlCount -->'
countMarkEnd = '<!-- /atwikibot:urlCount -->'
-
def get_cleaned_url(line):
# Extract the domain from the first URL appearing on a line, stripping away a leading "www." if any; returns None if no URL is found
match = extractionPattern.search(line)
@@ -19,7 +16,7 @@ def get_cleaned_url(line):
def handle_page(site, page):
# Extract domains from lines
- entries = collections.deque((line, get_cleaned_url(line)) for line in page.text().split('\n'))
+ entries = collections.deque((line, get_cleaned_url(line)) for line in page.text.split('\n'))
# Identify blocks of URLs and sort them
entries.append((None, None)) # Dummy entry at the end to trigger a last sorting if necessary
@@ -49,13 +46,13 @@ def handle_page(site, page):
outputStr = outputStr[:countMarkBeginPos] + countMarkBegin + 'This list currently contains ' + str(urlCount) + ' URL' + ('s' if urlCount != 1 else '') + '.' + countMarkEnd + outputStr[countMarkEndPos + len(countMarkEnd):]
# Update if necessary
- if page.text() != outputStr:
- if not site.logged_in: # Only log in when necessary
- site.login(os.environ['ATWIKIBOT_USERNAME'], os.environ['ATWIKIBOT_PASSWORD'])
- page.save(outputStr)
+ if page.text != outputStr:
+ site.login()
+ page.text = outputStr
+ page.save("Rendered from template")
def main():
- site = mwclient.Site('wiki.archiveteam.org', path = '/')
+ site = pywikibot.Site()
for page in site.allpages(prefix = 'List of websites excluded from the Wayback Machine'):
handle_page(site, page)