Skip to content

Commit

Permalink
fix(fetch-node): removed isSoup from default
Browse files Browse the repository at this point in the history
  • Loading branch information
PeriniM committed May 13, 2024
1 parent 353382b commit 0c15947
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
4 changes: 2 additions & 2 deletions examples/openai/search_graph_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-3.5-turbo",
"model": "gpt-4-turbo",
},
"max_results": 2,
"verbose": True,
Expand All @@ -28,7 +28,7 @@
# ************************************************

search_graph = SearchGraph(
prompt="List me the best escursions near Trento",
prompt="List me the heir of the British throne.",
config=graph_config
)

Expand Down
2 changes: 1 addition & 1 deletion examples/openai/smart_scraper_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# ************************************************

smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
prompt="List me all the links in the page",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config
Expand Down
9 changes: 5 additions & 4 deletions scrapegraphai/nodes/fetch_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def __init__(
False if node_config is None else node_config.get("verbose", False)
)
self.useSoup = (
True if node_config is None else node_config.get("useSoup", True)
False if node_config is None else node_config.get("useSoup", False)
)
self.loader_kwargs = (
{} if node_config is None else node_config.get("loader_kwargs", {})
)
Expand Down Expand Up @@ -117,7 +118,7 @@ def execute(self, state):
pass

elif not source.startswith("http"):
compressed_document = [Document(page_content=cleanup_html(source),
compressed_document = [Document(page_content=cleanup_html(data, source),
metadata={"source": "local_dir"}
)]

Expand All @@ -127,7 +128,7 @@ def execute(self, state):
cleanedup_html = cleanup_html(response.text, source)
compressed_document = [Document(page_content=cleanedup_html)]
else:
print(f"Failed to retrieve contents from the webpage at url: {url}")
print(f"Failed to retrieve contents from the webpage at url: {source}")

else:
loader_kwargs = {}
Expand All @@ -139,7 +140,7 @@ def execute(self, state):

document = loader.load()
compressed_document = [
Document(page_content=cleanup_html(str(document[0].page_content)))
Document(page_content=cleanup_html(str(document[0].page_content), source), metadata={"source": source})
]

state.update({self.output[0]: compressed_document})
Expand Down

0 comments on commit 0c15947

Please sign in to comment.