diff options
| author | 2019-02-26 11:18:50 +0000 | |
|---|---|---|
| committer | 2019-02-26 11:18:50 +0000 | |
| commit | cfaf6864fc88ad9c0ede2e19f2292c8e61c03163 (patch) | |
| tree | 4cb8ccec45745eb2dd7062a54dd626ff683a7e72 | |
| parent | Bump version to v1.2.1 (diff) | |
| signature | ||
Don't try to parse non-html/xml stuff with BeautifulSoup
| -rw-r--r-- | src/utils/http.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/src/utils/http.py b/src/utils/http.py index edc09f04..f502301d 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -8,6 +8,7 @@ USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 " REGEX_HTTP = re.compile("https?://", re.I) RESPONSE_MAX = (1024*1024)*100 +SOUP_CONTENT_TYPES = ["text/html", "text/xml", "application/xml"] class HTTPException(Exception): pass @@ -60,10 +61,12 @@ def request(url: str, method: str="GET", get_params: dict={}, response_headers = utils.CaseInsensitiveDict(dict(response.headers)) - if soup: + content_type = response.headers["Content-Type"].split(";", 1)[0] + if soup and content_type in SOUP_CONTENT_TYPES: soup = bs4.BeautifulSoup(response_content, parser) return Response(response.status_code, soup, response_headers) + data = response_content.decode(response.encoding or fallback_encoding) if json and data: try: |
