From 01441efe7564e31e465c84377518ec33176fb893 Mon Sep 17 00:00:00 2001 From: jesopo Date: Thu, 6 Feb 2020 11:54:04 +0000 Subject: only pull 'charset' from content-type when it definitely exists --- src/utils/http.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/utils/http.py') diff --git a/src/utils/http.py b/src/utils/http.py index f5595a2a..239ae11a 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -153,7 +153,9 @@ def _find_encoding(headers: typing.Dict[str, str], data: bytes meta_content_type = soup.findAll("meta", {"http-equiv": lambda v: (v or "").lower() == "content-type"}) if meta_content_type: - return _split_content(meta_content_type[0].get("content"))["charset"] + meta_content = _split_content(meta_content_type[0].get("content")) + if "charset" in meta_content: + return meta_content["charset"] doctype = [item for item in soup.contents if isinstance(item, bs4.Doctype)] or None -- cgit v1.3.1-10-gc9f91