diff --git a/src/SIL.Machine/Corpora/NParallelTextCorpus.cs b/src/SIL.Machine/Corpora/NParallelTextCorpus.cs index 78cfe606..a96cc806 100644 --- a/src/SIL.Machine/Corpora/NParallelTextCorpus.cs +++ b/src/SIL.Machine/Corpora/NParallelTextCorpus.cs @@ -120,11 +120,10 @@ private IEnumerable GetRows(IList> listOf { var rangeInfo = new NRangeInfo(N) { - Versification = Corpora[0].Versification, + Versifications = Corpora.Select(c => c.Versification).ToArray(), RowRefComparer = RowRefComparer }; - List[] sameRefRows = new List[Corpora.Count]; bool[] completed = listOfEnumerators.Select(e => !e.MoveNext()).ToArray(); while (!completed.All(c => c)) @@ -151,7 +150,7 @@ private IEnumerable GetRows(IList> listOf var currentIncompleteRows = currentRows.Where((r, i) => !completed[i]).ToArray(); IList nonMinRefIndexes = System.Linq.Enumerable.Range(0, N).Except(minRefIndexes).ToList(); - if (minRefIndexes.Count < (N - completed.Count(c => c))) //then there are some non-min refs + if (minRefIndexes.Count < (N - completed.Count(c => c)) || completed.Where(c => !c).Count() == 1) //then there are some non-min refs or only one incomplete enumerator { IList> minEnumerators = minRefIndexes .Select(i => listOfEnumerators[i]) @@ -161,20 +160,14 @@ private IEnumerable GetRows(IList> listOf .ToList(); if ( - minRefIndexes - .Select(i => - !AllRowsList[i] - && minRefIndexes - .Select(j => j != i && !completed[i] && listOfEnumerators[i].Current.IsInRange) - .Any(b => b) - ) - .Any(b => b) + nonMinRefIndexes.Any(i => !AllRowsList[i]) + && minRefIndexes.Where(i => !completed[i] && listOfEnumerators[i].Current.IsInRange).Any() ) { if ( rangeInfo.IsInRange && nonMinEnumerators - .Select(e => e.Current.IsInRange && e.Current.Segment.Count > 0) + .Where(e => e.Current != null && e.Current.IsInRange && e.Current.Segment.Count > 0) .Any() ) { @@ -188,17 +181,23 @@ private IEnumerable GetRows(IList> listOf foreach ( NParallelTextRow row in CreateMinRefRows( rangeInfo, - minEnumerators.Select(e => e.Current).ToList(), - minEnumerators.Where((e, i) => AllRowsList[i]).Select(e => e.Current).ToList(), + currentRows, + minRefIndexes, nonMinRefIndexes, - forceInRange: minEnumerators - .Select(e => e.Current.TextId) - .Union(nonMinEnumerators.Select(e => e.Current.TextId)) - .Distinct() - .Count() == 1 - && nonMinEnumerators - .Select(e => !e.Current.IsRangeStart && e.Current.IsInRange) - .Any() + forceInRange: minRefIndexes + .Select(i => + nonMinEnumerators + .Where(e => e.Current != null) + .Select(e => e.Current.TextId) + .Union(new List { currentRows[i].TextId }) + .Distinct() + .Count() == 1 //TODO clean up + && nonMinEnumerators + .Where(e => e.Current != null) + .Select(e => !e.Current.IsRangeStart && e.Current.IsInRange) + .Any(b => b) + ) + .ToList() ) ) { @@ -219,8 +218,8 @@ NParallelTextRow row in CreateMinRefRows( .Select(i => !AllRowsList[i] && minRefIndexes - .Select(j => j != i && !completed[i] && listOfEnumerators[i].Current.IsInRange) - .Any(b => b) + .Where(j => j != i && !completed[j] && listOfEnumerators[j].Current.IsInRange) + .Any() ) .Any(b => b) ) @@ -238,24 +237,21 @@ NParallelTextRow row in CreateMinRefRows( } else { - for (int i = 0; i < rangeInfo.Rows.Count - 1; i++) + for (int i = 0; i < rangeInfo.Rows.Count; i++) { for (int j = 0; j < rangeInfo.Rows.Count; j++) { - if (j <= i || completed[i] || completed[j]) + if (i == j || completed[i] || completed[j]) continue; if (rangeInfo.CheckSameRefRows(rangeInfo.Rows[i].SameRefRows, currentRows[j])) { foreach (TextRow tr in rangeInfo.Rows[i].SameRefRows) { - foreach ( - NParallelTextRow r in CreateRows( - rangeInfo, - rangeInfo.Rows[i].IsInRange, - new List { tr, currentRows[i] } - ) - ) + var textRows = new TextRow[N]; + textRows[i] = tr; + textRows[j] = currentRows[j]; + foreach (NParallelTextRow r in CreateRows(rangeInfo, textRows)) { yield return r; } @@ -263,13 +259,7 @@ NParallelTextRow r in CreateRows( } } } - foreach ( - NParallelTextRow row in CreateRows( - rangeInfo, - rangeInfo.IsInRange, - currentIncompleteRows - ) - ) + foreach (NParallelTextRow row in CreateRows(rangeInfo, currentIncompleteRows)) { yield return row; } @@ -289,29 +279,28 @@ NParallelTextRow row in CreateRows( } } - if (rangeInfo.IsInRange) //TODO + if (rangeInfo.IsInRange) yield return rangeInfo.CreateRow(); } } - private object[] UnifyVersification(object[] refs) + private object[] UnifyVersification(object[] refs, int i) { - if (Corpora[0].Versification == null || refs.Length == 0) + if (Corpora.Any(c => c.Versification == null) || refs.Length == 0) return refs; return refs.Cast() - .Select(r => r.ChangeVersification(Corpora[0].Versification)) + .Select(r => r.ChangeVersification(Corpora[i].Versification)) .Cast() .ToArray(); } private IEnumerable CreateRows( NRangeInfo rangeInfo, - bool isInRange, IList rows, IList forceInRange = null ) { - if (isInRange) + if (rangeInfo.IsInRange) yield return rangeInfo.CreateRow(); if (rows.All(r => r == null)) @@ -325,14 +314,14 @@ private IEnumerable CreateRows( { if (rows[i] != null) { - textId = textId ?? rows[i].TextId; - refs.Add(UnifyVersification(new object[] { rows[i].Ref })); + textId = textId ?? rows[i]?.TextId; + refs.Add(UnifyVersification(new object[] { rows[i].Ref }, i)); flags.Add(rows[i].Flags); } else { refs.Add(refRefs); - flags.Add(forceInRange == null || !forceInRange[i] ? TextRowFlags.None : TextRowFlags.InRange); + flags.Add(forceInRange != null && forceInRange[i] ? TextRowFlags.InRange : TextRowFlags.None); } } @@ -345,47 +334,52 @@ private IEnumerable CreateRows( private IEnumerable CreateMinRefRows( NRangeInfo rangeInfo, - IList minRefRows, - IList allRowsMinRefRows, + IList currentRows, + IList minRefIndexes, IList nonMinRefIndexes, - bool forceInRange = false + IList forceInRange = null ) { - List sameRefRows = rangeInfo - .Rows.Where((r, i) => nonMinRefIndexes.Contains(i)) - .SelectMany(r => r.SameRefRows) + List<(IList Rows, int Index)> sameRefRowsPerIndex = nonMinRefIndexes + .Select(i => (rangeInfo.Rows[i], i)) + .Select(pair => (pair.Item1.SameRefRows, pair.Item2)) .ToList(); - foreach (TextRow textRow in minRefRows) + List alreadyYielded = new List(); + + foreach (int i in minRefIndexes) { - if (rangeInfo.CheckSameRefRows(sameRefRows, textRow)) + TextRow textRow = currentRows[i]; + foreach ((IList sameRefRows, int j) in sameRefRowsPerIndex) { - foreach (TextRow sameRefRow in sameRefRows) + if (i == j) + continue; + if (rangeInfo.CheckSameRefRows(sameRefRows, textRow)) { - foreach ( - NParallelTextRow row in CreateRows( - rangeInfo, - rangeInfo.IsInRange, - new List() { textRow, sameRefRow }, - forceInRange: new List() { false, forceInRange } - ) - ) + alreadyYielded.Add(i); + foreach (TextRow sameRefRow in sameRefRows) { - yield return row; + var textRows = new TextRow[N]; + textRows[i] = textRow; + textRows[j] = sameRefRow; + foreach ( + NParallelTextRow row in CreateRows(rangeInfo, textRows, forceInRange: forceInRange) + ) + { + yield return row; + } } } } } - foreach (TextRow textRow in allRowsMinRefRows) + foreach (int i in minRefIndexes.Where(i => AllRowsList[i]).Except(alreadyYielded)) { - foreach ( - NParallelTextRow row in CreateRows( - rangeInfo, - textRow.IsInRange, - new List { textRow }, //TODO empty not non-existent - new List { forceInRange } - ) - ) + TextRow textRow = currentRows[i]; + var textRows = new TextRow[N]; + textRows[i] = textRow; + var forceCurrentInRange = new bool[N]; + forceCurrentInRange[i] = forceCurrentInRange[i]; + foreach (NParallelTextRow row in CreateRows(rangeInfo, textRows, forceCurrentInRange)) { yield return row; } @@ -406,7 +400,7 @@ private class NRangeInfo { public int N; public string TextId { get; set; } = ""; - public ScrVers Versification { get; set; } = null; + public ScrVers[] Versifications { get; set; } = null; public IComparer RowRefComparer { get; set; } = null; public List Rows { get; } public bool IsInRange => Rows.Any(r => r.IsInRange); @@ -452,20 +446,27 @@ public void AddTextRow(TextRow row, int index) public NParallelTextRow CreateRow() { - object[] refs = new object[0]; - foreach (RangeRow cRow in Rows) + object[][] refs = new object[N][]; + IList referenceRefs = Rows.Where(r => r.Refs.Count > 0).Select(r => r.Refs).FirstOrDefault(); + foreach (int i in System.Linq.Enumerable.Range(0, Rows.Count)) { - if (refs.Count() == 0 && Versification != null) + var row = Rows[i]; + + if (Versifications.All(v => v != null) && row.Refs.Count() == 0) { - refs = cRow - .Refs.ToArray() + refs[i] = referenceRefs + .ToArray() .Cast() - .Select(r => r.ChangeVersification(Versification)) + .Select(r => r.ChangeVersification(Versifications[i])) .Cast() .ToArray(); } + else + { + refs[i] = row.Refs.ToArray(); + } } - var nParRow = new NParallelTextRow(TextId, Rows.Select(r => r.Refs.ToList()).ToArray()) + var nParRow = new NParallelTextRow(TextId, refs) { NSegments = Rows.Select(r => r.Segment.ToArray()).ToArray(), NFlags = Rows.Select(r => r.IsSentenceStart ? TextRowFlags.SentenceStart : TextRowFlags.None) diff --git a/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs b/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs index b01b52ed..c120ceaf 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs @@ -1,4 +1,5 @@ -using NUnit.Framework; +using System.Text.Json; +using NUnit.Framework; using SIL.Scripture; namespace SIL.Machine.Corpora; @@ -627,8 +628,6 @@ public void GetRows_MissingText() Assert.That(rows[1].TargetSegment, Is.EqualTo("target segment 3 .".Split())); } - //TODO REMOVE: ABOVE PASS - [Test] public void GetRows_RangeAllTargetRows() { @@ -838,7 +837,7 @@ public void GetGetRows_SameRefLastOneToMany() Assert.That(rows[1].SourceSegment, Is.EqualTo("source segment 2 .".Split())); Assert.That(rows[1].TargetSegment, Is.EqualTo("target segment 2-1 .".Split())); Assert.That(rows[2].SourceRefs, Is.EqualTo(new[] { 2 })); - Assert.That(rows[2].TargetRefs, Is.EqualTo(new[] { 2 })); + Assert.That(rows[2].TargetRefs, Is.EqualTo(new[] { 2 }), JsonSerializer.Serialize(rows)); Assert.That(rows[2].SourceSegment, Is.EqualTo("source segment 2 .".Split())); Assert.That(rows[2].TargetSegment, Is.EqualTo("target segment 2-2 .".Split())); } @@ -1016,8 +1015,6 @@ public void GetGetRows_VerseRefOutOfOrder() ); } - //TODO REMOVE: BELOW PASS - [Test] public void Count_NoRows() {