aboutsummaryrefslogtreecommitdiff
path: root/src/utils
diff options
context:
space:
mode:
authorGravatar jesopo2018-10-10 13:41:58 +0100
committerGravatar jesopo2018-10-10 13:41:58 +0100
commit68f5626189c50c2429ffbf2c40a7807f6697f914 (patch)
treefdc5f331bd173de8da4aa625076603909f096366 /src/utils
parentSupport coin bets without a whole number (e.g. '0.1' can be '.1') (diff)
signature
Change utils.http to use requests
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/http.py113
1 files changed, 30 insertions, 83 deletions
diff --git a/src/utils/http.py b/src/utils/http.py
index 8a2832b4..0a0665d5 100644
--- a/src/utils/http.py
+++ b/src/utils/http.py
@@ -1,102 +1,49 @@
-import re, traceback, urllib.error, urllib.parse, urllib.request
-import json, ssl
-import bs4
+import re, traceback, urllib.error, urllib.parse
+import json as _json
+import bs4, requests
USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36")
REGEX_HTTP = re.compile("https?://", re.I)
-def get_url(url, **kwargs):
+def get_url(url, method="GET", get_params={}, post_data=None, headers={},
+ json_data=None, code=False, json=False, soup=False, parser="lxml"):
+
if not urllib.parse.urlparse(url).scheme:
url = "http://%s" % url
- url_parsed = urllib.parse.urlparse(url)
-
- method = kwargs.get("method", "GET")
- get_params = kwargs.get("get_params", "")
- post_params = kwargs.get("post_params", None)
- post_data = kwargs.get("post_data", None)
- headers = kwargs.get("headers", {})
- return_code = kwargs.get("code", False)
-
- if get_params:
- get_params = "?%s" % urllib.parse.urlencode(get_params)
- if post_params:
- post_data = urllib.parse.urlencode(post_params)
- url = "%s%s" % (url, get_params)
- try:
- url.encode("latin-1")
- if post_data:
- post_data = post_data.encode("utf8")
- except UnicodeEncodeError:
- if return_code:
- return 0, False
- return False
+ if not "Accept-Language" in headers:
+ headers["Accept-Language"] = "en-GB"
+ if not "User-Agent" in headers:
+ headers["User-Agent"] = USER_AGENT
- request = urllib.request.Request(url, post_data)
- request.add_header("Accept-Language", "en-US")
- request.add_header("User-Agent", USER_AGENT)
- for header, value in headers.items():
- request.add_header(header, value)
- request.method = method
+ response = requests.request(
+ method.upper(),
+ url,
+ headers=headers,
+ params=get_params,
+ data=post_data,
+ json=json_data
+ )
- try:
- response = urllib.request.urlopen(request, timeout=5)
- except urllib.error.HTTPError as e:
- traceback.print_exc()
- if return_code:
- return e.code, False
- return False
- except urllib.error.URLError as e:
- traceback.print_exc()
- if kwargs.get("code"):
- return -1, False
- return False
- except ssl.CertificateError as e:
- traceback.print_exc()
- if return_code:
- return -1, False,
- return False
-
- response_content = response.read()
- encoding = response.info().get_content_charset()
- if kwargs.get("soup"):
- soup = bs4.BeautifulSoup(response_content, kwargs.get("parser",
- "lxml"))
- if return_code:
+ if soup:
+ soup = bs4.BeautifulSoup(response.text, parser)
+ if code:
return response.code, soup
return soup
- if not encoding:
- soup = bs4.BeautifulSoup(response_content, kwargs.get("parser", "lxml"))
- metas = soup.find_all("meta")
- for meta in metas:
- if "charset=" in meta.get("content", ""):
- encoding = meta.get("content").split("charset=", 1)[1
- ].split(";", 1)[0]
- elif meta.get("charset", ""):
- encoding = meta.get("charset")
- else:
- continue
- break
- if not encoding:
- for item in soup.contents:
- if isinstance(item, bs4.Doctype):
- if item == "html":
- encoding = "utf8"
- else:
- encoding = "latin-1"
- break
- response_content = response_content.decode(encoding or "utf8")
- data = response_content
- if kwargs.get("json") and data:
+ data = response.text
+ if json and data:
try:
- data = json.loads(response_content)
- except json.decoder.JSONDecodeError:
+ data = _json.loads(data)
+ except _json.decoder.JSONDecodeError:
traceback.print_exc()
+ if code:
+ return 0, False
return False
- if kwargs.get("code"):
- return response.code, data
+
+ if code:
+ return response.status_code, data
else:
return data