diff options
| author | 2018-10-10 13:41:58 +0100 | |
|---|---|---|
| committer | 2018-10-10 13:41:58 +0100 | |
| commit | 68f5626189c50c2429ffbf2c40a7807f6697f914 (patch) | |
| tree | fdc5f331bd173de8da4aa625076603909f096366 /src/utils | |
| parent | Support coin bets without a whole number (e.g. '0.1' can be '.1') (diff) | |
| signature | ||
Change utils.http to use requests
Diffstat (limited to 'src/utils')
| -rw-r--r-- | src/utils/http.py | 113 |
1 files changed, 30 insertions, 83 deletions
diff --git a/src/utils/http.py b/src/utils/http.py index 8a2832b4..0a0665d5 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -1,102 +1,49 @@ -import re, traceback, urllib.error, urllib.parse, urllib.request -import json, ssl -import bs4 +import re, traceback, urllib.error, urllib.parse +import json as _json +import bs4, requests USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36") REGEX_HTTP = re.compile("https?://", re.I) -def get_url(url, **kwargs): +def get_url(url, method="GET", get_params={}, post_data=None, headers={}, + json_data=None, code=False, json=False, soup=False, parser="lxml"): + if not urllib.parse.urlparse(url).scheme: url = "http://%s" % url - url_parsed = urllib.parse.urlparse(url) - - method = kwargs.get("method", "GET") - get_params = kwargs.get("get_params", "") - post_params = kwargs.get("post_params", None) - post_data = kwargs.get("post_data", None) - headers = kwargs.get("headers", {}) - return_code = kwargs.get("code", False) - - if get_params: - get_params = "?%s" % urllib.parse.urlencode(get_params) - if post_params: - post_data = urllib.parse.urlencode(post_params) - url = "%s%s" % (url, get_params) - try: - url.encode("latin-1") - if post_data: - post_data = post_data.encode("utf8") - except UnicodeEncodeError: - if return_code: - return 0, False - return False + if not "Accept-Language" in headers: + headers["Accept-Language"] = "en-GB" + if not "User-Agent" in headers: + headers["User-Agent"] = USER_AGENT - request = urllib.request.Request(url, post_data) - request.add_header("Accept-Language", "en-US") - request.add_header("User-Agent", USER_AGENT) - for header, value in headers.items(): - request.add_header(header, value) - request.method = method + response = requests.request( + method.upper(), + url, + headers=headers, + params=get_params, + data=post_data, + json=json_data + ) - try: - response = urllib.request.urlopen(request, timeout=5) - except urllib.error.HTTPError as e: - traceback.print_exc() - if return_code: - return e.code, False - return False - except urllib.error.URLError as e: - traceback.print_exc() - if kwargs.get("code"): - return -1, False - return False - except ssl.CertificateError as e: - traceback.print_exc() - if return_code: - return -1, False, - return False - - response_content = response.read() - encoding = response.info().get_content_charset() - if kwargs.get("soup"): - soup = bs4.BeautifulSoup(response_content, kwargs.get("parser", - "lxml")) - if return_code: + if soup: + soup = bs4.BeautifulSoup(response.text, parser) + if code: return response.code, soup return soup - if not encoding: - soup = bs4.BeautifulSoup(response_content, kwargs.get("parser", "lxml")) - metas = soup.find_all("meta") - for meta in metas: - if "charset=" in meta.get("content", ""): - encoding = meta.get("content").split("charset=", 1)[1 - ].split(";", 1)[0] - elif meta.get("charset", ""): - encoding = meta.get("charset") - else: - continue - break - if not encoding: - for item in soup.contents: - if isinstance(item, bs4.Doctype): - if item == "html": - encoding = "utf8" - else: - encoding = "latin-1" - break - response_content = response_content.decode(encoding or "utf8") - data = response_content - if kwargs.get("json") and data: + data = response.text + if json and data: try: - data = json.loads(response_content) - except json.decoder.JSONDecodeError: + data = _json.loads(data) + except _json.decoder.JSONDecodeError: traceback.print_exc() + if code: + return 0, False return False - if kwargs.get("code"): - return response.code, data + + if code: + return response.status_code, data else: return data |
