diff --git a/Seq2SeqSharp/Corpus/ParallelCorpus.cs b/Seq2SeqSharp/Corpus/ParallelCorpus.cs index 90ed49e..ddd93eb 100644 --- a/Seq2SeqSharp/Corpus/ParallelCorpus.cs +++ b/Seq2SeqSharp/Corpus/ParallelCorpus.cs @@ -341,6 +341,11 @@ public void PrepareDataSet() Logger.WriteLine(Logger.Level.debug, $"Start to sort and shuffle data set by length."); m_sortedIndexedDataSetFilePath = tmpDataSetFilePath + ".sorted"; + +#if DEBUG + string tmp_sortedIndexedDataSetFilePath = tmpDataSetFilePath + ".sorted.txt"; + using (StreamWriter bwt = new StreamWriter(new FileStream(tmp_sortedIndexedDataSetFilePath, FileMode.Create, FileAccess.Write, FileShare.None, 40960000))) +#endif using (BinaryWriter bw = new BinaryWriter(new FileStream(m_sortedIndexedDataSetFilePath, FileMode.Create, FileAccess.Write, FileShare.None, 40960000))) using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(tmpDataSetFilePath)) using (MemoryMappedViewStream mms = mmf.CreateViewStream()) @@ -383,6 +388,12 @@ public void PrepareDataSet() bw.Write(String.Join("\n", srcLines)); bw.Write(String.Join("\n", tgtLines)); +#if DEBUG + bwt.WriteLine(sentSize); + bwt.WriteLine(String.Join("\n", srcLines)); + bwt.WriteLine(String.Join("\n", tgtLines)); +#endif + m_batchNumInTotal++; if (m_batchNumInTotal % 10000 == 0) {