aboutsummaryrefslogtreecommitdiff
path: root/src/utils
diff options
context:
space:
mode:
authorGravatar jesopo2019-09-02 13:25:48 +0100
committerGravatar jesopo2019-09-02 13:25:48 +0100
commit408b89aeb71212668e9227e2d23d9439b80591c5 (patch)
treef5dc49cec4cb3f79ad42b64781e93a0a35ba3e8e /src/utils
parentShow mumble server version (diff)
signature
use \S+ for url regex (for non-ascii chars), use url_sanitize to catch <>
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/http.py21
1 files changed, 12 insertions, 9 deletions
diff --git a/src/utils/http.py b/src/utils/http.py
index c2ecf35d..e4ec5fe9 100644
--- a/src/utils/http.py
+++ b/src/utils/http.py
@@ -5,25 +5,28 @@ import bs4, netifaces, requests
import tornado.httpclient
from src import utils
-REGEX_URL = re.compile("https?://[A-Z0-9{}]+".format(re.escape("-._~:/%?#[]@!$&'()*+,;=")), re.I)
+REGEX_URL = re.compile("https?://\S+", re.I)
+
+PAIRED_CHARACTERS = ["<>", "()"]
# best-effort tidying up of URLs
def url_sanitise(url: str):
if not urllib.parse.urlparse(url).scheme:
url = "http://%s" % url
- if url.endswith(")"):
+ for pair_start, pair_end in PAIRED_CHARACTERS:
# trim ")" from the end only if there's not a "(" to match it
# google.com/) -> google.com/
# google.com/() -> google.com/()
# google.com/()) -> google.com/()
-
- if "(" in url:
- open_index = url.rfind("(")
- other_index = url.rfind(")", 0, len(url)-1)
- if other_index == -1 or other_index < open_index:
- return url
- return url[:-1]
+ if url.endswith(pair_end):
+ if pair_start in url:
+ open_index = url.rfind("(")
+ other_index = url.rfind(")", 0, len(url)-1)
+ if not other_index == -1 and other_index < open_index:
+ url = url[:-1]
+ else:
+ url = url[:-1]
return url
USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 "