diff options
Diffstat (limited to 'src/utils/http.py')
| -rw-r--r-- | src/utils/http.py | 44 |
1 files changed, 28 insertions, 16 deletions
diff --git a/src/utils/http.py b/src/utils/http.py index d47f01c0..5261e955 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -1,4 +1,4 @@ -import re, traceback, urllib.error, urllib.parse +import re, signal, traceback, urllib.error, urllib.parse import json as _json import bs4, requests @@ -8,6 +8,14 @@ REGEX_HTTP = re.compile("https?://", re.I) RESPONSE_MAX = (1024*1024)*100 +class HTTPException: + pass +class HTTPTimeoutException(HTTPException): + pass +class HTTPParsingException(HTTPException): + pass + + def get_url(url, method="GET", get_params={}, post_data=None, headers={}, json_data=None, code=False, json=False, soup=False, parser="lxml"): @@ -19,16 +27,23 @@ def get_url(url, method="GET", get_params={}, post_data=None, headers={}, if not "User-Agent" in headers: headers["User-Agent"] = USER_AGENT - response = requests.request( - method.upper(), - url, - headers=headers, - params=get_params, - data=post_data, - json=json_data, - stream=True - ) - response_content = response.raw.read(RESPONSE_MAX, decode_content=True) + signal.signal(signal.SIGALRM, lambda: raise TimeoutError()) + signal.alarm(5) + try: + response = requests.request( + method.upper(), + url, + headers=headers, + params=get_params, + data=post_data, + json=json_data, + stream=True + ) + response_content = response.raw.read(RESPONSE_MAX, decode_content=True) + except TimeoutError: + raise HTTPTimeoutException() + finally: + signal.signal(signal.SIGALRM, signal.SIG_IGN) if soup: soup = bs4.BeautifulSoup(response_content, parser) @@ -40,11 +55,8 @@ def get_url(url, method="GET", get_params={}, post_data=None, headers={}, if json and data: try: data = _json.loads(data) - except _json.decoder.JSONDecodeError: - traceback.print_exc() - if code: - return 0, False - return False + except _json.decoder.JSONDecodeError as e: + raise HTTPParsingException(str(e)) if code: return response.status_code, data |
