From 6766487ca8692297168dba436ebb6c8bd66fc25e Mon Sep 17 00:00:00 2001 From: Damien Daspit <damien_daspit@sil.org> Date: Mon, 15 Jan 2018 16:45:24 +0700 Subject: [PATCH] segment and align USX texts --- .../translate/core/machine.service.ts | 7 +- .../translate/editor/document-editor.ts | 89 +++++------- .../translate/editor/editor.component.html | 4 +- .../translate/editor/editor.component.ts | 16 ++- .../languageforge/translate/editor/segment.ts | 2 +- .../translate/editor/segmenter.ts | 135 ++++++++++++++++++ .../Services/DeltaUsxMapper.cs | 28 +--- tsconfig.json | 2 + 8 files changed, 196 insertions(+), 87 deletions(-) create mode 100644 src/angular-app/languageforge/translate/editor/segmenter.ts diff --git a/src/angular-app/languageforge/translate/core/machine.service.ts b/src/angular-app/languageforge/translate/core/machine.service.ts index 74f3b0bd0b..9e7f831345 100644 --- a/src/angular-app/languageforge/translate/core/machine.service.ts +++ b/src/angular-app/languageforge/translate/core/machine.service.ts @@ -29,12 +29,11 @@ export class MachineService { } } - initialise(projectId: string, isScripture: boolean): void { + initialise(projectId: string): void { this.engine = new TranslationEngine(this.$window.location.origin + '/machine', projectId); this.updateConfidence(); - const segmentType = isScripture ? 'line' : 'latin'; - this.sourceSegmentTokenizer = new SegmentTokenizer(segmentType); - this.targetSegmentTokenizer = new SegmentTokenizer(segmentType); + this.sourceSegmentTokenizer = new SegmentTokenizer('latin'); + this.targetSegmentTokenizer = new SegmentTokenizer('latin'); } translate(sourceSegment: string): angular.IPromise<void> { diff --git a/src/angular-app/languageforge/translate/editor/document-editor.ts b/src/angular-app/languageforge/translate/editor/document-editor.ts index 78b8ad7f95..17e972c629 100644 --- a/src/angular-app/languageforge/translate/editor/document-editor.ts +++ b/src/angular-app/languageforge/translate/editor/document-editor.ts @@ -1,28 +1,27 @@ import * as angular from 'angular'; import Quill, { RangeStatic } from 'quill'; +import { InputSystem } from '../../../bellows/shared/model/input-system.model'; import { DocType, SaveState } from '../core/constants'; import { MachineService } from '../core/machine.service'; import { RealTimeService } from '../core/realtime.service'; import { MetricService } from './metric.service'; import { SuggestionsTheme } from './quill/suggestions-theme'; import { Segment } from './segment'; +import { MachineSegmenter, Segmenter, UsxSegmenter } from './segmenter'; export abstract class DocumentEditor { - static isSelectionCollapsed(selection: RangeStatic): boolean { - return selection != null && selection.length === 0; - } - static isTextEmpty(text: string): boolean { text = text.endsWith('\n') ? text.substr(0, text.length - 1) : text; return text === ''; } modulesConfig: any = {}; - inputSystem: any = {}; + inputSystem: InputSystem = new InputSystem(); protected currentSegment: Segment; - protected segmentRanges: RangeStatic[]; + protected segmenter: Segmenter; + private _isScripture: boolean = false; private documentSetId: string = ''; private readonly _created: angular.IDeferred<boolean>; @@ -31,6 +30,7 @@ export abstract class DocumentEditor { constructor(private readonly $q: angular.IQService, protected readonly machine: MachineService, private readonly realTime: RealTimeService) { this._created = this.$q.defer(); + this.segmenter = new MachineSegmenter(this, machine); } abstract get docType(): string; @@ -57,8 +57,8 @@ export abstract class DocumentEditor { return this.currentSegment == null ? '' : this.currentSegment.documentSetId; } - get currentSegmentIndex(): number { - return this.currentSegment == null ? -1 : this.currentSegment.index; + get currentSegmentRef(): string { + return this.currentSegment == null ? '' : this.currentSegment.ref; } get saveState(): SaveState { @@ -69,11 +69,22 @@ export abstract class DocumentEditor { return DocumentEditor.isTextEmpty(this.quill.getText()); } + get isScripture(): boolean { + return this._isScripture; + } + + set isScripture(value: boolean) { + if (value !== this._isScripture) { + this._isScripture = value; + this.segmenter = value ? new UsxSegmenter(this) : new MachineSegmenter(this, this.machine); + } + } + openDocumentSet(collection: string, documentSetId: string): void { if (this.documentSetId !== documentSetId) { this.documentSetId = documentSetId; this.realTime.createAndSubscribeRichTextDoc(collection, this.docId, this.quill); - this.segmentRanges = null; + this.segmenter.reset(); } } @@ -86,22 +97,18 @@ export abstract class DocumentEditor { } update(textChange: boolean): boolean { - if (this.segmentRanges == null || textChange) { - this.segmentRanges = this.getSegmentRanges(); - } + this.segmenter.update(textChange); + const selection = this.quill.getSelection(); if (selection == null) { return false; } - let segmentIndex = -1; - if (DocumentEditor.isSelectionCollapsed(selection)) { - segmentIndex = this.segmentRanges.findIndex(range => selection.index <= range.index + range.length); - } - if (segmentIndex === -1) { - segmentIndex = this.currentSegment == null ? this.segmentRanges.length - 1 : this.currentSegment.index; + let segmentRef = this.segmenter.getSegmentRef(selection); + if (segmentRef == null) { + segmentRef = this.currentSegment == null ? this.segmenter.lastSegmentRef : this.currentSegment.ref; } - if (this.switchCurrentSegment(segmentIndex)) { + if (this.switchCurrentSegment(segmentRef)) { // the selection has changed to a different segment return true; } else { @@ -111,15 +118,15 @@ export abstract class DocumentEditor { } } - switchCurrentSegment(segmentIndex: number): boolean { + switchCurrentSegment(segmentRef: string): boolean { if (this.currentSegment != null && this.documentSetId === this.currentSegment.documentSetId - && segmentIndex === this.currentSegment.index + && segmentRef === this.currentSegment.ref ) { // the selection has not changed to a different segment return false; } - this.currentSegment = new Segment(this.documentSetId, segmentIndex); + this.currentSegment = new Segment(this.documentSetId, segmentRef); this.updateCurrentSegment(); return true; } @@ -162,32 +169,8 @@ export abstract class DocumentEditor { return this.documentSetId + ':' + this.docType; } - private getSegmentRange(index: number): RangeStatic { - if (this.isTextEmpty) { - return { index: 0, length: 0 }; - } - - const segments = this.getSegmentRanges(); - return index < segments.length ? segments[index] : { index: this.quill.getLength() - 1, length: 0 }; - } - - private getSegmentRanges(): RangeStatic[] { - const text = this.quill.getText().substr(0, this.quill.getLength() - 1); - const segmentRanges = this.machine.tokenizeDocumentText(this.docType, text); - if (segmentRanges.length === 0) { - segmentRanges.push({ index: 0, length: 0 }); - } else { - const lastSegmentRange = segmentRanges[segmentRanges.length - 1]; - const lastSegmentEnd = lastSegmentRange.index + lastSegmentRange.length; - if (lastSegmentEnd < text.length) { - segmentRanges.push({ index: text.length, length: 0 }); - } - } - return segmentRanges; - } - private updateCurrentSegment() { - const range = this.getSegmentRange(this.currentSegment.index); + const range = this.segmenter.getSegmentRange(this.currentSegment.ref); const text = this.quill.getText(range.index, range.length); this.currentSegment.update(text, range); } @@ -247,16 +230,16 @@ export class TargetDocumentEditor extends DocumentEditor { this.updateSuggestions(); } - if (textChange && this.currentSegment != null && this.currentSegment.index === this.segmentRanges.length - 1) { + if (textChange && this.currentSegment != null && this.currentSegment.ref === this.segmenter.lastSegmentRef) { this.updateHighlight(this.currentSegment.range); } return segmentChanged; } - switchCurrentSegment(segmentIndex: number): boolean { + switchCurrentSegment(segmentRef: string): boolean { const previousSegment = this.currentSegment; - const segmentChanged = super.switchCurrentSegment(segmentIndex); + const segmentChanged = super.switchCurrentSegment(segmentRef); if (segmentChanged) { this.trainSegment(previousSegment); } @@ -358,7 +341,7 @@ export class TargetDocumentEditor extends DocumentEditor { return this.machine.trainSegment() .then(() => { segment.acceptChanges(); - this.$window.console.log('Segment ' + segment.index + ' of document ' + segment.documentSetId + this.$window.console.log('Segment ' + segment.ref + ' of document ' + segment.documentSetId + ' was trained successfully.'); }) .finally(() => this.pendingTrainCount--); @@ -428,11 +411,11 @@ export class SourceDocumentEditor extends DocumentEditor { return segmentChanged; } - switchCurrentSegment(segmentIndex: number): boolean { + switchCurrentSegment(segmentRef: string): boolean { if (!this.hasFocus) { this.isCurrentSegmentHighlighted = false; } - const segmentChanged = super.switchCurrentSegment(segmentIndex); + const segmentChanged = super.switchCurrentSegment(segmentRef); if (!this.hasFocus) { this.isCurrentSegmentHighlighted = true; } diff --git a/src/angular-app/languageforge/translate/editor/editor.component.html b/src/angular-app/languageforge/translate/editor/editor.component.html index eb90d81ede..79b9994f6e 100644 --- a/src/angular-app/languageforge/translate/editor/editor.component.html +++ b/src/angular-app/languageforge/translate/editor/editor.component.html @@ -136,7 +136,7 @@ <i class="fa fa-exchange"></i></button> </div> <ng-quill-editor class="notranslate" - id="editorSource" data-modules="$ctrl.source.modulesConfig" data-theme="suggestions" + id="editorSource" spellcheck="false" data-modules="$ctrl.source.modulesConfig" data-theme="suggestions" data-on-editor-created="$ctrl.onQuillCreated(editor, $ctrl.source)" data-on-content-changed="$ctrl.onContentChanged($ctrl.source)" data-on-selection-changed="$ctrl.onSelectionChanged($ctrl.source)" @@ -152,7 +152,7 @@ <label class="editor-label">{{$ctrl.getEditorLabel($ctrl.right)}}</label> </div> <ng-quill-editor class="notranslate" - id="editorTarget" data-modules="$ctrl.target.modulesConfig" data-theme="suggestions" + id="editorTarget" spellcheck="false" data-modules="$ctrl.target.modulesConfig" data-theme="suggestions" data-on-editor-created="$ctrl.onQuillCreated(editor, $ctrl.target)" data-on-content-changed="$ctrl.onContentChanged($ctrl.target)" data-on-selection-changed="$ctrl.onSelectionChanged($ctrl.target)" diff --git a/src/angular-app/languageforge/translate/editor/editor.component.ts b/src/angular-app/languageforge/translate/editor/editor.component.ts index e3c67109f6..4cc339cba1 100644 --- a/src/angular-app/languageforge/translate/editor/editor.component.ts +++ b/src/angular-app/languageforge/translate/editor/editor.component.ts @@ -1,5 +1,4 @@ import * as angular from 'angular'; -import { setTimeout } from 'core-js/library/web/timers'; import { SmtTrainProgress } from 'machine'; import Quill from 'quill'; @@ -141,9 +140,12 @@ export class TranslateEditorController implements angular.IController { new TranslateUserPreferences(); this.source.inputSystem = this.tecProject.config.source.inputSystem; this.target.inputSystem = this.tecProject.config.target.inputSystem; - this.machine.initialise(this.tecProject.slug, this.tecProject.config.isTranslationDataScripture); + this.machine.initialise(this.tecProject.slug); this.showFormats = this.tecProject.config.userPreferences.isFormattingOptionsShown; + this.source.isScripture = this.tecProject.config.isTranslationDataScripture; + this.target.isScripture = this.tecProject.config.isTranslationDataScripture; + if (this.tecProject.config.documentSets.idsOrdered != null && this.tecProject.config.documentSets.idsOrdered.length > 0 ) { @@ -204,7 +206,11 @@ export class TranslateEditorController implements angular.IController { } }); - this.machine.initialise(this.tecProject.slug, this.tecProject.config.isTranslationDataScripture); + this.machine.initialise(this.tecProject.slug); + + this.source.isScripture = this.tecProject.config.isTranslationDataScripture; + this.target.isScripture = this.tecProject.config.isTranslationDataScripture; + this.listenForTrainingStatus(); } } @@ -600,7 +606,7 @@ export class TranslateEditorController implements angular.IController { if (segmentChanged) { // select the corresponding source segment - this.source.switchCurrentSegment(this.target.currentSegmentIndex); + this.source.switchCurrentSegment(this.target.currentSegmentRef); if (this.currentDocType) { this.metricService.sendMetrics(true, this.target.currentSegmentDocumentSetId); @@ -625,7 +631,7 @@ export class TranslateEditorController implements angular.IController { case DocType.SOURCE: if (segmentChanged) { - this.target.switchCurrentSegment(this.source.currentSegmentIndex); + this.target.switchCurrentSegment(this.source.currentSegmentRef); if (!this.currentDocType && this.selectedDocumentSetIndex in this.documentSets) { this.metricService.currentDocumentSetId = this.documentSets[this.selectedDocumentSetIndex].id; diff --git a/src/angular-app/languageforge/translate/editor/segment.ts b/src/angular-app/languageforge/translate/editor/segment.ts index 4d49737490..59737daaae 100644 --- a/src/angular-app/languageforge/translate/editor/segment.ts +++ b/src/angular-app/languageforge/translate/editor/segment.ts @@ -5,7 +5,7 @@ export class Segment { private _range: RangeStatic; private initialText: string; - constructor(public readonly documentSetId: string, public readonly index: number) { } + constructor(public readonly documentSetId: string, public readonly ref: string) { } get text(): string { return this._text; diff --git a/src/angular-app/languageforge/translate/editor/segmenter.ts b/src/angular-app/languageforge/translate/editor/segmenter.ts new file mode 100644 index 0000000000..0bd25d8e20 --- /dev/null +++ b/src/angular-app/languageforge/translate/editor/segmenter.ts @@ -0,0 +1,135 @@ +import { RangeStatic } from 'quill'; +import { MachineService } from '../core/machine.service'; +import { DocumentEditor } from './document-editor'; + +export abstract class Segmenter { + protected readonly segments: Map<string, RangeStatic> = new Map<string, RangeStatic>(); + protected _lastSegmentRef: string = ''; + + constructor(protected readonly doc: DocumentEditor) { } + + get lastSegmentRef(): string { + return this._lastSegmentRef; + } + + update(textChange: boolean): void { + if (this._lastSegmentRef === '' || textChange) { + this.updateSegments(); + } + } + + reset(): void { + this.segments.clear(); + this._lastSegmentRef = ''; + } + + getSegmentRange(ref: string): RangeStatic { + if (this.doc.isTextEmpty) { + return { index: 0, length: 0 }; + } + + return this.segments.has(ref) ? this.segments.get(ref) : { index: this.doc.quill.getLength() - 1, length: 0 }; + } + + getSegmentRef(range: RangeStatic): string { + let segmentRef: string; + if (range != null && range.length === 0) { + for (const [ref, segmentRange] of this.segments) { + if (range.index <= segmentRange.index + segmentRange.length) { + segmentRef = ref; + break; + } + } + } + return segmentRef; + } + + protected abstract updateSegments(): void; +} + +export class MachineSegmenter extends Segmenter { + constructor(doc: DocumentEditor, private readonly machine: MachineService) { + super(doc); + } + + protected updateSegments(): void { + const text = this.doc.quill.getText().substr(0, this.doc.quill.getLength() - 1); + const segmentRanges = this.machine.tokenizeDocumentText(this.doc.docType, text); + if (segmentRanges.length === 0) { + segmentRanges.push({ index: 0, length: 0 }); + } else { + const lastSegmentRange = segmentRanges[segmentRanges.length - 1]; + const lastSegmentEnd = lastSegmentRange.index + lastSegmentRange.length; + if (lastSegmentEnd < text.length) { + segmentRanges.push({ index: text.length, length: 0 }); + } + } + this.reset(); + for (let i = 0; i < segmentRanges.length; i++) { + this.segments.set(i.toString(), segmentRanges[i]); + } + this._lastSegmentRef = (segmentRanges.length - 1).toString(); + } +} + +export class UsxSegmenter extends Segmenter { + protected updateSegments(): void { + const delta = this.doc.quill.getContents(); + this.reset(); + const nextStyleIds = new Map<string, number>(); + let chapter = ''; + let verse = ''; + let curIndex = 0; + let curRangeLen = 0; + delta.forEach(op => { + const len = typeof op.insert === 'string' ? op.insert.length : 1; + if (op.attributes == null) { + curRangeLen += len; + } else { + if (op.attributes.para != null) { + const style = op.attributes.para.style as string; + let nextId = nextStyleIds.get(style); + if (nextId == null) { + nextId = 0; + nextStyleIds.set(style, nextId); + } + + if (curRangeLen > 0) { + this._lastSegmentRef = style + '_' + nextId; + this.segments.set(this._lastSegmentRef, { index: curIndex, length: curRangeLen }); + } + curIndex += curRangeLen + len; + curRangeLen = 0; + nextId++; + nextStyleIds.set(style, nextId); + } else if (op.attributes.chapter != null) { + chapter = op.attributes.chapter.number as string; + verse = ''; + this._lastSegmentRef = 'chapter_' + chapter; + this.segments.set(this._lastSegmentRef, { index: curIndex, length: curRangeLen }); + curIndex += curRangeLen + len; + curRangeLen = 0; + } else if (op.attributes.verse != null) { + if (verse !== '') { + const verseText = this.doc.quill.getText(curIndex, curRangeLen); + let verseRangeLen = curRangeLen; + for (let i = verseText.length - 1; i >= 0 && UsxSegmenter.isWhitespace(verseText[i]); i--) { + verseRangeLen--; + } + this._lastSegmentRef = 'verse_' + chapter + ':' + verse; + this.segments.set(this._lastSegmentRef, { index: curIndex, length: verseRangeLen }); + } + curIndex += curRangeLen; + curRangeLen = len; + verse = op.attributes.verse.number as string; + } else { + curRangeLen += len; + } + } + }); + } + + private static isWhitespace(char: string): boolean { + return /\s/.test(char); + } +} diff --git a/src/netcore-api/SIL.XForge.WebApi.Server/Services/DeltaUsxMapper.cs b/src/netcore-api/SIL.XForge.WebApi.Server/Services/DeltaUsxMapper.cs index c53bd2819e..04d9caf89f 100644 --- a/src/netcore-api/SIL.XForge.WebApi.Server/Services/DeltaUsxMapper.cs +++ b/src/netcore-api/SIL.XForge.WebApi.Server/Services/DeltaUsxMapper.cs @@ -14,26 +14,25 @@ public static Delta ToDelta(XElement usxElem) { var newDelta = new Delta(); int nextNoteId = 1; - bool inVerse = false; foreach (XElement elem in usxElem.Elements()) { switch (elem.Name.LocalName) { case "para": - ProcessChildNodes(newDelta, elem, ref inVerse, ref nextNoteId); + ProcessChildNodes(newDelta, elem, ref nextNoteId); newDelta.Insert("\n", OpAttributes("para", elem)); break; case "chapter": newDelta.Insert((string) elem.Attribute("number")); newDelta.Insert("\n", OpAttributes("chapter", elem)); - inVerse = false; break; } } + newDelta.Insert("\n"); return newDelta; } - private static void ProcessChildNodes(Delta newDelta, XElement elem, ref bool inVerse, ref int nextNoteId, + private static void ProcessChildNodes(Delta newDelta, XElement elem, ref int nextNoteId, JObject parentAttrs = null) { foreach (XNode node in elem.Nodes()) @@ -44,19 +43,7 @@ private static void ProcessChildNodes(Delta newDelta, XElement elem, ref bool in switch (e.Name.LocalName) { case "verse": - if (inVerse) - { - JToken lastOp = newDelta.Ops[newDelta.Ops.Count - 1]; - var text = (string) lastOp[Delta.InsertType]; - if (text == null || text != "\n") - { - newDelta.Insert("\n", new JObject( - new JProperty("para", new JObject( - new JProperty("verse-alignment", ""))))); - } - } newDelta.Insert((string) e.Attribute("number"), OpAttributes("verse", e, parentAttrs)); - inVerse = true; break; case "char": @@ -67,7 +54,7 @@ private static void ProcessChildNodes(Delta newDelta, XElement elem, ref bool in string noteId = $"_note_{nextNoteId}"; nextNoteId++; JObject noteAttrs = OpAttributes("note", e, parentAttrs, noteId); - ProcessChildNodes(newDelta, e, ref inVerse, ref nextNoteId, noteAttrs); + ProcessChildNodes(newDelta, e, ref nextNoteId, noteAttrs); break; } break; @@ -119,11 +106,8 @@ public static XElement ToUsx(string bookId, string desc, Delta delta) switch (prop.Name) { case "para": - if (prop.Value["verse-alignment"] == null) - { - newUsxElem.Add(CreateContainerElement("para", prop.Value, childNodes)); - childNodes.Clear(); - } + newUsxElem.Add(CreateContainerElement("para", prop.Value, childNodes)); + childNodes.Clear(); break; case "chapter": diff --git a/tsconfig.json b/tsconfig.json index 71e404687b..da30e09fef 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -10,8 +10,10 @@ "compileOnSave": false, "compilerOptions": { "alwaysStrict": true, + "downlevelIteration": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, + "importHelpers": true, "lib": ["es7", "dom"], "module": "es6", "moduleResolution": "node",