aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar jesopo2018-10-10 23:49:42 +0100
committerGravatar jesopo2018-10-10 23:49:59 +0100
commitc655668bbec83445776928d2be3eabcdd9cf0028 (patch)
treea7a7044a38ad7e5498441fbfabec6d10fb557aaf
parentrandbelow takes one argument (diff)
signature
Add fallback_encoding to utils.http.get_url, in case a page has no implicit
encoding
-rw-r--r--src/utils/http.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/src/utils/http.py b/src/utils/http.py
index d8b96dc7..a91808ed 100644
--- a/src/utils/http.py
+++ b/src/utils/http.py
@@ -19,7 +19,8 @@ def throw_timeout():
raise HTTPTimeoutException()
def get_url(url, method="GET", get_params={}, post_data=None, headers={},
- json_data=None, code=False, json=False, soup=False, parser="lxml"):
+ json_data=None, code=False, json=False, soup=False, parser="lxml",
+ fallback_encoding="utf8"):
if not urllib.parse.urlparse(url).scheme:
url = "http://%s" % url
@@ -53,7 +54,7 @@ def get_url(url, method="GET", get_params={}, post_data=None, headers={},
return response.code, soup
return soup
- data = response_content.decode(response.encoding)
+ data = response_content.decode(response.encoding or fallback_encoding)
if json and data:
try:
data = _json.loads(data)