diff options
| author | 2019-09-09 14:39:19 +0100 | |
|---|---|---|
| committer | 2019-09-09 14:39:19 +0100 | |
| commit | 0a67659637cc878b8e486cdcdefa0a9fba655319 (patch) | |
| tree | 4bcf7d32d9ae2e9ed627906b1479bb74024e8321 /src/utils | |
| parent | add !8 as alias of !8ball (diff) | |
| signature | ||
only look for <meta>-related tags when there are meta tags
Diffstat (limited to 'src/utils')
| -rw-r--r-- | src/utils/http.py | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/src/utils/http.py b/src/utils/http.py index 9cfc70a1..232b1c6b 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -66,19 +66,21 @@ def _meta_content(s: str) -> typing.Dict[str, str]: return out def _find_encoding(soup: bs4.BeautifulSoup) -> typing.Optional[str]: - meta_charset = soup.meta.get("charset") - if not meta_charset == None: - return meta_charset - else: + if not soup.meta == None: + meta_charset = soup.meta.get("charset") + if not meta_charset == None: + return meta_charset + meta_content_type = soup.findAll("meta", {"http-equiv": lambda v: (v or "").lower() == "content-type"}) if meta_content_type: return _meta_content(meta_content_type[0].get("content"))["charset"] - else: - doctype = [item for item in soup.contents if isinstance(item, - bs4.Doctype)] or None - if doctype and doctype[0] == "html": - return "utf8" + + doctype = [item for item in soup.contents if isinstance(item, + bs4.Doctype)] or None + if doctype and doctype[0] == "html": + return "utf8" + return None def request(url: str, method: str="GET", get_params: dict={}, |
