Skip to content

Commit

Permalink
[kbss-cvut/termit-ui#461] Make text quote selector context length con…
Browse files Browse the repository at this point in the history
…figurable.
  • Loading branch information
ledsoft committed Jun 24, 2024
1 parent cdb6bc4 commit 470b979
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
package cz.cvut.kbss.termit.service.document.html;

import cz.cvut.kbss.termit.model.selector.Selector;
import cz.cvut.kbss.termit.util.Configuration;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;

import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
Expand All @@ -37,8 +37,13 @@
@Service
public class HtmlSelectorGenerators {

private final List<SelectorGenerator> generators = Arrays
.asList(new TextQuoteSelectorGenerator(), new TextPositionSelectorGenerator());
private final List<SelectorGenerator> generators;

public HtmlSelectorGenerators(Configuration config) {
this.generators = List.of(
new TextQuoteSelectorGenerator(config.getTextAnalysis().getTextQuoteSelectorContextLength()),
new TextPositionSelectorGenerator());
}

/**
* Generates selectors for the specified HTML/XML elements.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@
*/
class TextQuoteSelectorGenerator implements SelectorGenerator {

/**
* Length of the generated prefix and suffix
*/
static final int CONTEXT_LENGTH = 32;
private final int contextLength;

TextQuoteSelectorGenerator(int contextLength) {
this.contextLength = contextLength;
}

@Override
public TextQuoteSelector generateSelector(Element... elements) {
Expand All @@ -55,12 +56,12 @@ private Optional<String> extractPrefix(Element start) {
current = current.parent();
final List<Node> previousSiblings = current.childNodes().subList(0, previous.siblingIndex());
sb = extractNodeText(previousSiblings).append(sb);
if (sb.length() >= CONTEXT_LENGTH) {
if (sb.length() >= contextLength) {
break;
}
previous = current;
}
return !sb.isEmpty() ? Optional.of(sb.substring(Math.max(0, sb.length() - CONTEXT_LENGTH))) : Optional.empty();
return !sb.isEmpty() ? Optional.of(sb.substring(Math.max(0, sb.length() - contextLength))) : Optional.empty();
}

private Optional<String> extractSuffix(Element end) {
Expand All @@ -72,11 +73,11 @@ private Optional<String> extractSuffix(Element end) {
final List<Node> previousSiblings = current.childNodes()
.subList(previous.siblingIndex() + 1, current.childNodeSize());
sb.append(extractNodeText(previousSiblings));
if (sb.length() >= CONTEXT_LENGTH) {
if (sb.length() >= contextLength) {
break;
}
previous = current;
}
return !sb.isEmpty() ? Optional.of(sb.substring(0, Math.min(sb.length(), CONTEXT_LENGTH))) : Optional.empty();
return !sb.isEmpty() ? Optional.of(sb.substring(0, Math.min(sb.length(), contextLength))) : Optional.empty();
}
}
21 changes: 18 additions & 3 deletions src/main/java/cz/cvut/kbss/termit/util/Configuration.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import cz.cvut.kbss.termit.model.acl.AccessLevel;
import jakarta.validation.Valid;
import jakarta.validation.constraints.Min;
import jakarta.validation.constraints.NotNull;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Primary;
Expand Down Expand Up @@ -570,13 +571,19 @@ public static class TextAnalysis {
/**
* URL of the text analysis service.
*/
String url;
private String url;

/**
* Score threshold for a term occurrence for it to be saved into the repository.
*/
@NotNull
String termOccurrenceMinScore = Constants.SCORE_THRESHOLD.toString();
private String termOccurrenceMinScore = Constants.SCORE_THRESHOLD.toString();

/**
* Maximum of the prefix and suffix of a text quote selector.
*/
@Min(8)
private int textQuoteSelectorContextLength = 32;

public String getUrl() {
return url;
Expand All @@ -593,6 +600,14 @@ public String getTermOccurrenceMinScore() {
public void setTermOccurrenceMinScore(String termOccurrenceMinScore) {
this.termOccurrenceMinScore = termOccurrenceMinScore;
}

public int getTextQuoteSelectorContextLength() {
return textQuoteSelectorContextLength;
}

public void setTextQuoteSelectorContextLength(int textQuoteSelectorContextLength) {
this.textQuoteSelectorContextLength = textQuoteSelectorContextLength;
}
}

@Validated
Expand All @@ -601,7 +616,7 @@ public static class Glossary {
* IRI path to append to vocabulary IRI to get glossary identifier.
*/
@NotNull
String fragment;
private String fragment;

public String getFragment() {
return fragment;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import cz.cvut.kbss.termit.model.selector.Selector;
import cz.cvut.kbss.termit.service.document.html.DummySelectorGenerator;
import cz.cvut.kbss.termit.service.document.html.HtmlSelectorGenerators;
import cz.cvut.kbss.termit.util.Configuration;
import org.aspectj.lang.Aspects;
import org.jsoup.nodes.Element;
import org.springframework.boot.test.context.TestConfiguration;
Expand Down Expand Up @@ -76,8 +77,8 @@ public LocalValidatorFactoryBean validatorFactoryBean() {

@Bean
@Primary
public HtmlSelectorGenerators htmlSelectorGenerators() {
return new HtmlSelectorGenerators() {
public HtmlSelectorGenerators htmlSelectorGenerators(Configuration configuration) {
return new HtmlSelectorGenerators(configuration) {
@Override
public Set<Selector> generateSelectors(Element... elements) {
return Collections.singleton(new DummySelectorGenerator().generateSelector(elements));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class HtmlTermOccurrenceResolverTest {

@SuppressWarnings("unused")
@Spy
private HtmlSelectorGenerators selectorGenerators = new HtmlSelectorGenerators();
private HtmlSelectorGenerators selectorGenerators = new HtmlSelectorGenerators(config);

@Mock
private DocumentManager documentManager;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,22 @@
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static cz.cvut.kbss.termit.service.document.html.TextQuoteSelectorGenerator.CONTEXT_LENGTH;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;

class TextQuoteSelectorGeneratorTest {

private static final int CONTEXT_LENGTH = 32;

private TextQuoteSelectorGenerator sut;

private Document document;

@BeforeEach
void setUp() {
this.document = new Document("");
this.sut = new TextQuoteSelectorGenerator();
this.sut = new TextQuoteSelectorGenerator(CONTEXT_LENGTH);
}

@Test
Expand Down

0 comments on commit 470b979

Please sign in to comment.