From df38d7a57f6b809de4808ed601bfc747aef1c79c Mon Sep 17 00:00:00 2001
From: jesopo
Date: Thu, 13 Feb 2020 21:50:33 +0000
Subject: replace lxml usage with html5lib! the future is cool

---
 src/utils/http.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/utils/http.py')

diff --git a/src/utils/http.py b/src/utils/http.py
index 239ae11a..9f25b315 100644
--- a/src/utils/http.py
+++ b/src/utils/http.py
@@ -127,7 +127,7 @@ class Response(object):
         return self.data.decode(encoding or self.encoding)
     def json(self) -> typing.Any:
         return _json.loads(self.data)
-    def soup(self, parser: str="lxml") -> bs4.BeautifulSoup:
+    def soup(self, parser: str="html5lib") -> bs4.BeautifulSoup:
         return bs4.BeautifulSoup(self.decode(), parser)
 
 def _split_content(s: str) -> typing.Dict[str, str]:
@@ -144,7 +144,7 @@ def _find_encoding(headers: typing.Dict[str, str], data: bytes
         if "charset" in content_header:
             return content_header["charset"]
 
-    soup = bs4.BeautifulSoup(data, "lxml")
+    soup = bs4.BeautifulSoup(data, "html5lib")
     if not soup.meta == None:
         meta_charset = soup.meta.get("charset")
         if not meta_charset == None:
@@ -275,7 +275,7 @@ class Client(object):
     request_many = request_many
 
 def strip_html(s: str) -> str:
-    return bs4.BeautifulSoup(s, "lxml").get_text()
+    return bs4.BeautifulSoup(s, "html5lib").get_text()
 
 def resolve_hostname(hostname: str) -> typing.List[str]:
     try:
-- 
cgit v1.3.1-10-gc9f91