Skip to content

Commit

Permalink
Trunicated too long sentence while iterating indexed data set.
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongkaifu committed Dec 18, 2024
1 parent c54dc5f commit 243f358
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Seq2SeqSharp/Corpus/MonoCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ public IEnumerator<T> GetEnumerator()
}
}

IPair sntPair = new SntPair(tgtLine, tgtLine);
IPair sntPair = new SntPair(tgtLine, tgtLine, maxSrcLength: m_maxTgtTokenSize, maxTgtLength: m_maxTgtTokenSize);
currentTokenCountsInBatch += sntPair.GetTgtTokenCount();
outputs.Add(sntPair);

Expand Down
10 changes: 9 additions & 1 deletion Seq2SeqSharp/Corpus/Seq2SeqCorpusBatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,19 +115,27 @@ public class SntPair : IPair
public List<string> SrcTokens;
public List<string> TgtTokens;

public SntPair(string srcLine, string tgtLine)
public SntPair(string srcLine, string tgtLine, int maxSrcLength = -1, int maxTgtLength = -1)
{
SrcTokens = new List<string>();
TgtTokens = new List<string>();

if (String.IsNullOrEmpty(srcLine) == false)
{
SrcTokens = srcLine.Split(' ').ToList();
if (maxSrcLength >= 0 && SrcTokens.Count > maxSrcLength)
{
SrcTokens = SrcTokens.GetRange(0, maxSrcLength);
}
}

if (String.IsNullOrEmpty(tgtLine) == false)
{
TgtTokens = tgtLine.Split(' ').ToList();
if (maxTgtLength >= 0 && TgtTokens.Count > maxTgtLength)
{
TgtTokens = TgtTokens.GetRange(0, maxTgtLength);
}
}
}

Expand Down

0 comments on commit 243f358

Please sign in to comment.