From e4a5bd01e9e43df2e947d34e5e515002d59ddb30 Mon Sep 17 00:00:00 2001 From: jesopo Date: Tue, 26 Nov 2019 14:34:48 +0000 Subject: explicitly use "lxml" for finding page encoding --- src/utils/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/utils/http.py') diff --git a/src/utils/http.py b/src/utils/http.py index b21d6e70..699c48f1 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -137,7 +137,7 @@ def _meta_content(s: str) -> typing.Dict[str, str]: return out def _find_encoding(data: bytes) -> typing.Optional[str]: - soup = bs4.BeautifulSoup(data) + soup = bs4.BeautifulSoup(data, "lxml") if not soup.meta == None: meta_charset = soup.meta.get("charset") if not meta_charset == None: -- cgit v1.3.1-10-gc9f91