Skip to content

Commit

Permalink
Adding debug mode writing of final training data
Browse files Browse the repository at this point in the history
  • Loading branch information
zsogitbe committed Nov 28, 2023
1 parent a2f0ec1 commit 39436db
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions Seq2SeqSharp/Corpus/ParallelCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,11 @@ public void PrepareDataSet()
Logger.WriteLine(Logger.Level.debug, $"Start to sort and shuffle data set by length.");

m_sortedIndexedDataSetFilePath = tmpDataSetFilePath + ".sorted";

#if DEBUG
string tmp_sortedIndexedDataSetFilePath = tmpDataSetFilePath + ".sorted.txt";
using (StreamWriter bwt = new StreamWriter(new FileStream(tmp_sortedIndexedDataSetFilePath, FileMode.Create, FileAccess.Write, FileShare.None, 40960000)))
#endif
using (BinaryWriter bw = new BinaryWriter(new FileStream(m_sortedIndexedDataSetFilePath, FileMode.Create, FileAccess.Write, FileShare.None, 40960000)))
using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(tmpDataSetFilePath))
using (MemoryMappedViewStream mms = mmf.CreateViewStream())
Expand Down Expand Up @@ -383,6 +388,12 @@ public void PrepareDataSet()
bw.Write(String.Join("\n", srcLines));
bw.Write(String.Join("\n", tgtLines));

#if DEBUG
bwt.WriteLine(sentSize);
bwt.WriteLine(String.Join("\n", srcLines));
bwt.WriteLine(String.Join("\n", tgtLines));
#endif

m_batchNumInTotal++;
if (m_batchNumInTotal % 10000 == 0)
{
Expand Down

0 comments on commit 39436db

Please sign in to comment.