From e48044d36ffda613f65da24641ed8da290195177 Mon Sep 17 00:00:00 2001 From: AmineDiro Date: Fri, 13 Dec 2024 14:17:04 +0100 Subject: [PATCH] feat: send all to megaparse_sdk (#3521) # Description - Send all to megaparse sdk Co-authored-by: aminediro --- core/pyproject.toml | 6 ++---- .../processor/implementations/megaparse_processor.py | 3 +-- core/quivr_core/processor/registry.py | 10 +++++++++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/core/pyproject.toml b/core/pyproject.toml index 2876fdf23539..bf88a89e39c4 100644 --- a/core/pyproject.toml +++ b/core/pyproject.toml @@ -2,9 +2,7 @@ name = "quivr-core" version = "0.0.26" description = "Quivr core RAG package" -authors = [ - { name = "Stan Girard", email = "stan@quivr.app" } -] +authors = [{ name = "Stan Girard", email = "stan@quivr.app" }] dependencies = [ "pydantic>=2.8.2", "langchain-core>=0.2.38", @@ -23,7 +21,7 @@ dependencies = [ "faiss-cpu>=1.8.0.post1", "rapidfuzz>=3.10.1", "markupsafe>=2.1.5", - "megaparse-sdk==0.1.7" + "megaparse-sdk>=0.1.9", ] readme = "README.md" requires-python = ">= 3.11" diff --git a/core/quivr_core/processor/implementations/megaparse_processor.py b/core/quivr_core/processor/implementations/megaparse_processor.py index 2c46cec105a9..55f4948fc3d2 100644 --- a/core/quivr_core/processor/implementations/megaparse_processor.py +++ b/core/quivr_core/processor/implementations/megaparse_processor.py @@ -31,6 +31,7 @@ class MegaparseProcessor(ProcessorBase): """ supported_extensions = [ + FileExtension.txt, FileExtension.pdf, FileExtension.docx, FileExtension.doc, @@ -42,11 +43,9 @@ class MegaparseProcessor(ProcessorBase): FileExtension.bib, FileExtension.odt, FileExtension.html, - FileExtension.py, FileExtension.markdown, FileExtension.md, FileExtension.mdx, - FileExtension.ipynb, ] def __init__( diff --git a/core/quivr_core/processor/registry.py b/core/quivr_core/processor/registry.py index 4be8af8ca79d..d0e62dff1a15 100644 --- a/core/quivr_core/processor/registry.py +++ b/core/quivr_core/processor/registry.py @@ -124,13 +124,21 @@ def defaults_to_proc_entries( _append_proc_mapping( mapping=base_processors, file_exts=[ + FileExtension.txt, FileExtension.pdf, - FileExtension.xls, FileExtension.docx, + FileExtension.doc, FileExtension.pptx, + FileExtension.xls, + FileExtension.xlsx, + FileExtension.csv, FileExtension.epub, + FileExtension.bib, FileExtension.odt, FileExtension.html, + FileExtension.markdown, + FileExtension.md, + FileExtension.mdx, ], cls_mod="quivr_core.processor.implementations.megaparse_processor.MegaparseProcessor", errtxt=f"can't import MegaparseProcessor. Please install quivr-core[{ext_str}] to access MegaparseProcessor",