From df8d2536175fbe77e32178ac790e3e99612a362c Mon Sep 17 00:00:00 2001 From: Lonneke Scheffer Date: Tue, 6 Feb 2024 15:27:24 +0100 Subject: [PATCH] bugfix: metadata.duplicate_count instead of .count in KmerFrequencyEncoder, update docs --- .../receptor/receptor_sequence/SequenceMetadata.py | 5 ++--- immuneML/encodings/kmer_frequency/KmerFrequencyEncoder.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/immuneML/data_model/receptor/receptor_sequence/SequenceMetadata.py b/immuneML/data_model/receptor/receptor_sequence/SequenceMetadata.py index 493490f04..46a86805e 100644 --- a/immuneML/data_model/receptor/receptor_sequence/SequenceMetadata.py +++ b/immuneML/data_model/receptor/receptor_sequence/SequenceMetadata.py @@ -9,12 +9,11 @@ class SequenceMetadata: """ class modeling the existing knowledge about a receptor_sequence, should be stored according to - IMGT gene nomenclature (human can be found `here - `_): + AIRR nomenclature - v call - j call - chain - - count + - duplicate_count - region_type (e.g. IMGT_CDR3, IMGT_CDR1, FULL_SEQUENCE) - frame_type (e.g. IN, OUT, STOP) - sample diff --git a/immuneML/encodings/kmer_frequency/KmerFrequencyEncoder.py b/immuneML/encodings/kmer_frequency/KmerFrequencyEncoder.py index 0df6cbd13..aa5428016 100644 --- a/immuneML/encodings/kmer_frequency/KmerFrequencyEncoder.py +++ b/immuneML/encodings/kmer_frequency/KmerFrequencyEncoder.py @@ -287,7 +287,7 @@ def _encode_sequence(self, sequence: ReceptorSequence, params: EncoderParams, se if self.reads == ReadsType.UNIQUE: counts[i] += 1 elif self.reads == ReadsType.ALL: - counts[i] += sequence.metadata.count + counts[i] += sequence.metadata.duplicate_count return counts def get_additional_files(self) -> List[str]: