diff --git a/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 7173c21..99df1d2 100644 --- a/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -1,4 +1,5 @@ import os +import re from typing import Any, Optional, Type import requests @@ -67,7 +68,7 @@ def _run( page.encoding = page.apparent_encoding parsed = BeautifulSoup(page.text, "html.parser") - text = parsed.get_text() - text = "\n".join([i for i in text.split("\n") if i.strip() != ""]) - text = " ".join([i for i in text.split(" ") if i.strip() != ""]) + text = parsed.get_text(" ") + text = re.sub('[ \t]+', ' ', text) + text = re.sub('\\s+\n\\s+', '\n', text) return text