diff options
| author | 2018-10-10 23:49:42 +0100 | |
|---|---|---|
| committer | 2018-10-10 23:49:59 +0100 | |
| commit | c655668bbec83445776928d2be3eabcdd9cf0028 (patch) | |
| tree | a7a7044a38ad7e5498441fbfabec6d10fb557aaf /src | |
| parent | randbelow takes one argument (diff) | |
| signature | ||
Add fallback_encoding to utils.http.get_url, in case a page has no implicit
encoding
Diffstat (limited to 'src')
| -rw-r--r-- | src/utils/http.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/src/utils/http.py b/src/utils/http.py index d8b96dc7..a91808ed 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -19,7 +19,8 @@ def throw_timeout(): raise HTTPTimeoutException() def get_url(url, method="GET", get_params={}, post_data=None, headers={}, - json_data=None, code=False, json=False, soup=False, parser="lxml"): + json_data=None, code=False, json=False, soup=False, parser="lxml", + fallback_encoding="utf8"): if not urllib.parse.urlparse(url).scheme: url = "http://%s" % url @@ -53,7 +54,7 @@ def get_url(url, method="GET", get_params={}, post_data=None, headers={}, return response.code, soup return soup - data = response_content.decode(response.encoding) + data = response_content.decode(response.encoding or fallback_encoding) if json and data: try: data = _json.loads(data) |
