Skip to content

Commit

Permalink
Improve the PR
Browse files Browse the repository at this point in the history
  • Loading branch information
InAnYan committed Jul 10, 2024
1 parent 27cb183 commit 3fb4cbb
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv

### Added

- We added an AI chat for linked files.
- We added support for offline extracting references from PDFs following the IEEE format. [#11156](https://github.com/JabRef/jabref/pull/11156)
- We added a new keyboard shortcut <kbd>ctrl</kbd> + <kbd>,</kbd> to open the preferences. [#11154](https://github.com/JabRef/jabref/pull/11154)
- We added value selection (such as for month) for content selectors in custom entry types. [#11109](https://github.com/JabRef/jabref/issues/11109)
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/jabref/gui/JabRefGUI.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.jabref.gui;

import java.util.Arrays;
import java.util.List;
import java.util.Optional;

Expand Down Expand Up @@ -315,10 +316,10 @@ public void startBackgroundTasks() {

@Override
public void stop() {
aiService.close();
OOBibBaseConnect.closeOfficeConnection();
stopBackgroundTasks();
shutdownThreadPools();
aiService.close();
}

public void stopBackgroundTasks() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void endIngestingFile(String link, long modificationTimeInSeconds) {
}

public Optional<Long> getIngestedFileModificationTime(String link) {
return Optional.of(ingestedMap.get(link));
return Optional.ofNullable(ingestedMap.get(link));
}

public void registerListener(Object listener) {
Expand Down
31 changes: 23 additions & 8 deletions src/main/java/org/jabref/logic/ai/embeddings/AiIngestor.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import java.util.Optional;
import java.util.concurrent.TimeUnit;

import javafx.beans.property.BooleanProperty;

import org.jabref.logic.ai.AiService;
import org.jabref.logic.util.io.FileUtil;
import org.jabref.logic.xmp.XmpUtilReader;
Expand All @@ -19,6 +21,7 @@
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
Expand All @@ -34,20 +37,26 @@ public class AiIngestor {
private final AiService aiService;

private EmbeddingStoreIngestor ingestor;
private DocumentSplitter documentSplitter;

// A workaround to stop ingesting files.
private BooleanProperty shutdownProperty;

public AiIngestor(AiService aiService) {
public AiIngestor(AiService aiService, BooleanProperty shutdownProperty) {
this.aiService = aiService;
this.ingestor = rebuild(aiService);
this.shutdownProperty = shutdownProperty;

rebuild(aiService);

setupListeningToPreferencesChanges();
}

private EmbeddingStoreIngestor rebuild(AiService aiService) {
DocumentSplitter documentSplitter = DocumentSplitters
private void rebuild(AiService aiService) {
this.documentSplitter = DocumentSplitters
.recursive(aiService.getPreferences().getDocumentSplitterChunkSize(),
aiService.getPreferences().getDocumentSplitterOverlapSize());

return EmbeddingStoreIngestor
this.ingestor = EmbeddingStoreIngestor
.builder()
.embeddingStore(aiService.getEmbeddingsManager().getEmbeddingsStore())
.embeddingModel(aiService.getEmbeddingModel().getEmbeddingModel())
Expand All @@ -56,7 +65,7 @@ private EmbeddingStoreIngestor rebuild(AiService aiService) {
}

private void setupListeningToPreferencesChanges() {
aiService.getPreferences().onEmbeddingsParametersChange(() -> ingestor = rebuild(aiService));
aiService.getPreferences().onEmbeddingsParametersChange(() -> rebuild(aiService));
}

/**
Expand Down Expand Up @@ -120,11 +129,17 @@ private void ingestPDFFile(Path path, Metadata metadata) {
}
}

private void ingestString(String string, Metadata metadata) {
private void ingestString(String string, Metadata metadata) throws InterruptedException {
ingestDocument(new Document(string, metadata));
}

private void ingestDocument(Document document) {
ingestor.ingest(document);
for (TextSegment documentPart : documentSplitter.split(document)) {
if (shutdownProperty.get()) {
return;
}

ingestor.ingest(new Document(documentPart.text(), document.metadata()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;

import javafx.beans.property.BooleanProperty;
import javafx.beans.property.SimpleBooleanProperty;

import org.jabref.gui.util.BackgroundTask;
import org.jabref.gui.util.TaskExecutor;
import org.jabref.gui.util.UiTaskExecutor;
Expand Down Expand Up @@ -40,28 +43,31 @@ public class EmbeddingsGenerationTask extends BackgroundTask<Void> {
private final List<LinkedFile> linkedFileQueue = Collections.synchronizedList(new ArrayList<>());
private int numOfProcessedFiles = 0;

private final Object lock = new Object();
private AtomicBoolean isRunning = new AtomicBoolean(false);
private AtomicBoolean isBlockingNewTasks = new AtomicBoolean(false);
private final AtomicBoolean isRunning = new AtomicBoolean(false);
private final AtomicBoolean isBlockingNewTasks = new AtomicBoolean(false);

private final BooleanProperty shutdownProperty = new SimpleBooleanProperty(false);

public EmbeddingsGenerationTask(BibDatabaseContext databaseContext, FilePreferences filePreferences, AiService aiService, TaskExecutor taskExecutor) {
this.databaseContext = databaseContext;
this.filePreferences = filePreferences;
this.aiService = aiService;
this.taskExecutor = taskExecutor;

this.aiIngestor = new AiIngestor(aiService);
this.aiIngestor = new AiIngestor(aiService, shutdownProperty);

configure();

setupListeningToPreferencesChanges();
}

private void configure() {
showToUser(true);
willBeRecoveredAutomatically(true);
updateProgress(1, 1);
titleProperty().set(Localization.lang("Embeddings generation"));

this.onFailure(e -> {
throw new RuntimeException(e);
});
}

private void setupListeningToPreferencesChanges() {
Expand All @@ -86,7 +92,6 @@ public void addToStore(LinkedFile linkedFile) {

if (!isRunning.get()) {
this.executeWith(taskExecutor);
showToUser(false);
}
}
}
Expand Down Expand Up @@ -138,6 +143,7 @@ public void updateEmbeddings(BibDatabaseContext bibDatabaseContext) {
@Override
protected Void call() throws Exception {
isRunning.set(true);
showToUser(true);

updateProgress();

Expand All @@ -151,6 +157,7 @@ protected Void call() throws Exception {
}

isRunning.set(false);
showToUser(false);

return null;
}
Expand Down Expand Up @@ -198,6 +205,6 @@ public void updateDatabaseName(String name) {

public void shutdown() {
linkedFileQueue.clear();
// TODO: Stop the AiIngestor.
shutdownProperty.set(true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo;
import dev.langchain4j.store.embedding.filter.comparison.IsIn;
import jakarta.annotation.Nullable;
import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -33,6 +34,10 @@

/**
* A custom implementation of langchain4j's {@link EmbeddingStore} that uses a {@link MVStore} as an embedded database.
* <p>
* Every embedding has 3 fields: float array (the embedding itself), file where it was generated from, and the embedded
* string (the content). Each of those fields is stored in a separate {@link MVMap}.
* To connect values in those fields we use an id, which is a random {@link UUID}.
*/
public class MVStoreEmbeddingStore implements EmbeddingStore<TextSegment> {
public static final String LINKED_FILE_METADATA_KEY = "linkedFile";
Expand Down

0 comments on commit 3fb4cbb

Please sign in to comment.