diff --git a/llama_hub/s3/base.py b/llama_hub/s3/base.py index 6a5c1dcf0e..875fbcf8e9 100644 --- a/llama_hub/s3/base.py +++ b/llama_hub/s3/base.py @@ -25,7 +25,7 @@ def __init__( prefix: Optional[str] = "", file_extractor: Optional[Dict[str, Union[str, BaseReader]]] = None, required_exts: Optional[List[str]] = None, - filename_as_id: bool = False, + filename_as_id: bool = True, num_files_limit: Optional[int] = None, file_metadata: Optional[Callable[[str], Dict]] = None, aws_access_id: Optional[str] = None, @@ -152,8 +152,7 @@ def load_data(self, custom_temp_subdir: str = None) -> List[Document]: documents = self.load_s3_files_as_docs(temp_dir) shutil.rmtree(temp_dir) - if not self.filename_as_id: - for doc in documents: - doc.id_ = self.s3_endpoint_url + "_" + doc.metadata["filename"] + for doc in documents: + doc.id_ = self.s3_endpoint_url + "_" + doc.id_ return documents