aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar jesopo2019-07-02 14:10:18 +0100
committerGravatar jesopo2019-07-02 14:10:18 +0100
commit534854127be47f8892c3f3952d779d31014452df (patch)
tree635d6c3b6fbc7593da11336985e1a948188e0fe8
parentmulitline-concat shouldn't be a c2c tag (diff)
signature
Add utils.http.url_validate() for best-effort url tidying
-rw-r--r--src/utils/http.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/utils/http.py b/src/utils/http.py
index 88555568..e65e1e23 100644
--- a/src/utils/http.py
+++ b/src/utils/http.py
@@ -6,6 +6,22 @@ from src import utils
REGEX_URL = re.compile("https?://[A-Z0-9{}]+".format(re.escape("-._~:/%?#[]@!$&'()*+,;=")), re.I)
+# best-effort tidying up of URLs
+def url_validate(url: str):
+ if url.endswith(")"):
+ # trim ")" from the end only if there's not a "(" to match it
+ # google.com/) -> google.com/
+ # google.com/() -> google.com/()
+ # google.com/()) -> google.com/()
+
+ if "(" in url:
+ open_index = url.rfind("(")
+ other_index = url.rfind(")", 0, len(url)-1)
+ if other_index == -1 or other_index < open_index:
+ return url
+ return url[:-1]
+ return url
+
USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36")