diff --git a/taranis/__main__.py b/taranis/__main__.py
index 880a361..a3011a5 100644
--- a/taranis/__main__.py
+++ b/taranis/__main__.py
@@ -450,6 +450,24 @@ def reference_alleles(
     default=False,
     help="Create alignment files",
 )
+@click.option(
+    "-q",
+    "--proteine-threshold",
+    required=False,
+    nargs=1,
+    default=80,
+    type=int,
+    help="Threshold of protein coverage to consider as TPR. default 90",
+)
+@click.option(
+    "-i",
+    "--increase-sequence",
+    required=False,
+    nargs=1,
+    default=20,
+    type=int,
+    help="Increase the number of triplet sequences to find the stop codon. default 20",
+)
 @click.option(
     "--cpus",
     required=False,
@@ -469,6 +487,8 @@ def allele_calling(
     force: bool,
     snp: bool,
     alignment: bool,
+    proteine_threshold: int,
+    increase_sequence: int,
     cpus: int,
 ):
     _ = taranis.utils.check_additional_programs_installed(
@@ -515,6 +535,8 @@ def allele_calling(
                 inf_allele_obj,
                 snp,
                 alignment,
+                proteine_threshold,
+                increase_sequence,
             )
             for assembly_file in assemblies
         ]
@@ -525,23 +547,27 @@ def allele_calling(
                 print(e)
                 continue
     """
-    results.append(
-        taranis.allele_calling.parallel_execution(
-            assemblies[0],
-            schema,
-            prediction_data,
-            schema_ref_files,
-            threshold,
-            perc_identity,
-            output,
-            inf_allele_obj,
-            snp,
-            alignment,
+    for assembly_file in assemblies:
+        results.append(
+            taranis.allele_calling.parallel_execution(
+                assembly_file,
+                schema,
+                prediction_data,
+                schema_ref_files,
+                threshold,
+                perc_identity,
+                output,
+                inf_allele_obj,
+                snp,
+                alignment,
+                proteine_threshold,
+                increase_sequence,
+            )
         )
-    )
     _ = taranis.allele_calling.collect_data(
         results, output, snp, alignment, schema_ref_files
     )
     finish = time.perf_counter()
     print(f"Allele calling finish in {round((finish-start)/60, 2)} minutes")
+    log.info("Allele calling finish in %s minutes", round((finish-start)/60, 2))
     # sample_allele_obj.analyze_sample()
diff --git a/taranis/allele_calling.py b/taranis/allele_calling.py
index c233740..cb886c3 100644
--- a/taranis/allele_calling.py
+++ b/taranis/allele_calling.py
@@ -8,6 +8,7 @@
 
 from collections import OrderedDict
 from pathlib import Path
+from Bio.Seq import Seq
 from Bio import SeqIO
 from io import StringIO
 
@@ -35,6 +36,8 @@ def __init__(
         inf_alle_obj: object,
         snp_request: bool = False,
         aligment_request: bool = False,
+        tpr_limit: int = 80,
+        increase_sequence: int = 20,
     ):
         """Allele calling initial creation object
 
@@ -48,9 +51,12 @@ def __init__(
             inf_alle_obj (object): object to infer alleles
             snp_request (bool, optional): snp saved to file. Defaults to False.
             aligment_request (bool, optional): allignment saved to file. Defaults to False.
+            tpr_limit (int, optional): lower threshold to consider trunked proteine. Defaults to 80.
+            increase_sequence (int, optional): increase sequence to be analysed. Defaults to 20.
         """
         self.prediction_data = annotation  # store prediction annotation
         self.sample_file = sample_file
+        self.sample_records = taranis.utils.read_fasta_file(self.sample_file, convert_to_dict=True)
         self.schema = schema
         self.ref_alleles = reference_alleles
         self.threshold = threshold
@@ -65,9 +71,11 @@ def __init__(
         self.inf_alle_obj = inf_alle_obj
         self.snp_request = snp_request
         self.aligment_request = aligment_request
+        self.tpr_limit = tpr_limit / 100
+        self.increase_sequence = increase_sequence * 3
 
     def assign_allele_type(
-        self, blast_results: list, allele_file: str, allele_name: str
+        self, blast_results: list, allele_file: str, allele_name: str, ref_allele_seq: str
     ) -> list:
         """Assign allele type to the allele
 
@@ -75,12 +83,16 @@ def assign_allele_type(
             blast_result (list): information collected by running blast
             allele_file (str): file name with allele sequence
             allele_name (str): allele name
+            ref_allele_seq (str): reference allele sequence
 
         Returns:
             list: containing allele classification, allele match id, and allele
             details
         """
 
+        def add_sequences_(column_blast_res: list) -> bool:
+            pass
+
         def check_if_plot(column_blast_res: list) -> bool:
             """Check if allele is partial length
 
@@ -91,12 +103,12 @@ def check_if_plot(column_blast_res: list) -> bool:
                 bool: True if allele is partial length
             """
             if (
-                column_blast_res[9] == "1"  # check  at contig start
+                column_blast_res[8] == "1"  # check  at contig start
                 # check if contig ends is the same as match allele ends
-                or column_blast_res[15] == column_blast_res[10]
-                or column_blast_res[10] == "1"  # check reverse at contig end
+                or column_blast_res[9] == column_blast_res[7]
+                or column_blast_res[9] == "1"  # check reverse at contig end
                 # check if contig start is the same as match allele start reverse
-                or column_blast_res[9] == column_blast_res[15]
+                or column_blast_res[8] == column_blast_res[7]
             ):
                 return True
             return False
@@ -119,7 +131,83 @@ def discard_low_threshold_results(blast_results: list) -> list:
                     valid_blast_result.append(b_result)
             return valid_blast_result
 
-        def get_blast_details(blast_result: str, allele_name: str) -> list:
+        def extend_sequence_for_finding_stop_codon(split_blast_result: list) -> list:
+            """Extend match sequence, according the (increase_sequence) for
+                trying find the stop codon.
+
+            Args:
+                split_blast_result (list): list having the informaction collected
+                by running blast
+
+            Returns:
+                list: updated information if stop codon is found
+            """
+            # collect data for checking PLOT
+            data_for_plot = [""] * 10
+            # cop the contig length
+            data_for_plot[7] = split_blast_result[15]
+            # copy start position
+            data_for_plot[8] = split_blast_result[9]
+            # copy end position
+            data_for_plot[9] = split_blast_result[10]
+            # check if PLOT
+            if not check_if_plot(data_for_plot):
+                # fetch the sequence until the last triplet is stop codon
+                contig_seq = self.sample_records[split_blast_result[1]]
+                start_seq = int(split_blast_result[9])
+                stop_seq = int(split_blast_result[10])
+                if stop_seq > start_seq:
+                    # sequence direction is forward
+                    direction = "forward"
+                    # adjust the sequence to be a triplet
+                    interval = (stop_seq - start_seq) // 3 * 3
+                    new_stop_seq = start_seq + interval + self.increase_sequence
+                    start_seq -= 1
+                    # if the increased length is higher than the contig length
+                    # adjust the stop sequence to maximun contig length
+                    # multiply by 3.
+                    if stop_seq > len(contig_seq):
+                        stop_seq = len(contig_seq) // 3 * 3
+                    else:
+                        stop_seq = new_stop_seq -1
+                    c_sequence = contig_seq[start_seq:stop_seq]
+                else:
+                    # sequence direction is reverse
+                    direction = "reverse"
+                    # adjust the sequence to be a triplet
+                    interval = (start_seq - stop_seq) // 3 * 3
+                    new_stop_seq = start_seq - interval - self.increase_sequence
+                    # if the increased length is lower than 0 (contig start)
+                    # position, adjust the start sequence to minumum contig
+                    # length multiply by 3
+                    if new_stop_seq < 0:
+                        # get the minimum contig length that is multiple by 3
+                        stop_seq = stop_seq % 3 - 1
+                    else:
+                        stop_seq = new_stop_seq
+                    # get the sequence in reverse
+                    c_sequence = str(Seq(contig_seq[stop_seq:start_seq]).reverse_complement())
+                new_prot_conv_result = taranis.utils.convert_to_protein(c_sequence, force_coding=False, check_additional_bases=False)
+                # check if stop codon is found in protein sequence
+                # pdb.set_trace()
+                if "protein" in new_prot_conv_result and  "*" in new_prot_conv_result["protein"]:
+                    new_seq_length = new_prot_conv_result["protein"].index("*") * 3
+                    match_sequence = c_sequence[:new_seq_length]
+                    split_blast_result[4] = str(new_seq_length)
+                    prot_error_result = "-"
+                    # update the start and stop position
+
+                    if direction == "forward":
+                        # pdb.set_trace()
+                        split_blast_result[10] = str(int(split_blast_result[9]) + new_seq_length)
+                    else:
+                        split_blast_result[9] = str(int(split_blast_result[10]) - new_seq_length)
+                # ignore the previous process if stop codon is not found 
+
+                # pdb.set_trace()
+            return split_blast_result
+
+        def get_blast_details(blast_result: str, allele_name: str, ref_allele_seq) -> list:
             """Collect blast details and modify the order of the columns
 
             Args:
@@ -144,6 +232,8 @@ def get_blast_details(blast_result: str, allele_name: str) -> list:
                     blast_details[13] = allele quality
                     blast_details[14] = match sequence in contig
                     blast_details[15] = reference allele sequence
+                    blast_details[16] = protein conversion result
+                    blast_details[17] = predicted protein sequence
             """
             split_blast_result = blast_result.split("\t")
             match_allele_name = split_blast_result[0]
@@ -163,7 +253,21 @@ def get_blast_details(blast_result: str, allele_name: str) -> list:
                 direction = "-"
             # remove the gaps in sequences
             match_sequence = split_blast_result[13].replace("-", "")
-            reference_sequence = split_blast_result[14].replace("-", "")
+            # check if the sequence is coding
+            prot_conv_result = taranis.utils.convert_to_protein(match_sequence, force_coding=False, check_additional_bases=True)
+            prot_error_result = prot_conv_result["error"] if "error" in prot_conv_result else "-"
+            predicted_prot_seq = prot_conv_result["protein"] if "protein" in prot_conv_result else "-"
+            # remove if additional sequenced are added at the end of the stop codon
+            pdb.set_trace()
+            if "additional bases added after stop codon" in prot_error_result:
+                new_seq_len = len(match_sequence) // 3 * 3
+                match_sequence = match_sequence[:new_seq_len]
+                split_blast_result[4] = str(new_seq_len)
+            # add more sequence to find the stop codon
+            elif "Last sequence is not a stop codon" in prot_error_result:
+                split_blast_result = extend_sequence_for_finding_stop_codon(split_blast_result)
+            elif "Sequence does not have a start codon" in prot_error_result:
+                pass
             # get blast details
             blast_details = [
                 self.s_name,  # sample name
@@ -181,8 +285,11 @@ def get_blast_details(blast_result: str, allele_name: str) -> list:
                 product_annotation,
                 allele_quality,
                 match_sequence,  # match sequence in contig
-                reference_sequence,  # reference allele sequence
+                ref_allele_seq,  # reference allele sequence
+                prot_error_result,  # protein conversion result
+                predicted_prot_seq,  # predicted protein sequence
             ]
+
             return blast_details
 
         def find_match_allele_schema(allele_file: str, match_sequence: str) -> str:
@@ -196,7 +303,7 @@ def find_match_allele_schema(allele_file: str, match_sequence: str) -> str:
                 str: allele name in the schema that match the sequence
             """
             grep_result = taranis.utils.grep_execution(
-                allele_file, match_sequence, "-b1"
+                allele_file, match_sequence, "-xb1"
             )
             if len(grep_result) > 0:
                 return grep_result[0].split("_")[1]
@@ -206,13 +313,13 @@ def find_match_allele_schema(allele_file: str, match_sequence: str) -> str:
         match_allele_schema = ""
         if len(valid_blast_results) == 0:
             # no match results labelled as LNF. details data filled with empty data
-            return ["LNF", "-", ["-," * 15]]
+            return ["LNF", "-", ["-"]* 18]
         if len(valid_blast_results) > 1:
             # could  be NIPHEM or NIPH
             b_split_data = []
             match_allele_seq = []
             for valid_blast_result in valid_blast_results:
-                multi_allele_data = get_blast_details(valid_blast_result, allele_name)
+                multi_allele_data = get_blast_details(valid_blast_result, allele_name, ref_allele_seq)
                 # get match allele sequence
                 match_allele_seq.append(multi_allele_data[14])
                 b_split_data.append(multi_allele_data)
@@ -232,29 +339,33 @@ def find_match_allele_schema(allele_file: str, match_sequence: str) -> str:
             for (idx,) in range(len(b_split_data)):
                 b_split_data[idx][4] = classification + "_" + match_allele_schema
         else:
-            b_split_data = get_blast_details(valid_blast_results[0], allele_name)
+            b_split_data = get_blast_details(valid_blast_results[0], allele_name, ref_allele_seq)
             # found the allele in schema with the match sequence in the contig
             match_allele_schema = find_match_allele_schema(
-                allele_file, b_split_data[14]
+                allele_file, b_split_data[14] 
             )
-            # PLOT, ASM, ALM, INF, EXC are possible classifications
-            if check_if_plot(b_split_data):
+            # PLOT, TPR, ASM, ALM, INF, EXC are possible classifications
+            if match_allele_schema != "":
+                # exact match found labelled as EXC
+                classification = "EXC"
+            elif check_if_plot(b_split_data):
                 # match allele is partial length labelled as PLOT
                 classification = "PLOT"
-
-            # check if match allele is shorter than reference allele
-            elif int(b_split_data[5]) < int(b_split_data[6]):
+            # check if protein length divided by the length of triplet matched
+            # sequence is lower the the tpr limit
+            elif b_split_data[16] == "Multiple stop codons" and b_split_data[17].index("*") / (int(b_split_data[6]) / 3) < self.tpr_limit:
+                # labelled as TPR
+                classification = "TPR"
+                # check if match allele is shorter than reference allele
+            elif int(b_split_data[6]) < int(b_split_data[5]):
                 classification = "ASM"
             # check if match allele is longer than reference allele
-            elif int(b_split_data[5]) > int(b_split_data[6]):
+            elif int(b_split_data[6]) > int(b_split_data[5]):
                 classification = "ALM"
             else:
                 # if sequence was not found after running grep labelled as INF
-                if match_allele_schema == "":
-                    classification = "INF"
-                else:
-                    # exact match found labelled as EXC
-                    classification = "EXC"
+                classification = "INF"
+       
             # assign an identification value to the new allele
             if match_allele_schema == "":
                 match_allele_schema = str(
@@ -289,11 +400,13 @@ def search_match_allele(self):
                 len(self.ref_alleles),
             )
 
-            alleles = OrderedDict()
+            alleles = taranis.utils.read_fasta_file(ref_allele, convert_to_dict=True)
             match_found = False
+            """ 
             with open(ref_allele, "r") as fh:
                 for record in SeqIO.parse(fh, "fasta"):
                     alleles[record.id] = str(record.seq)
+            """
             count_2 = 0
             for r_id, r_seq in alleles.items():
                 count_2 += 1
@@ -321,7 +434,7 @@ def search_match_allele(self):
                     result["allele_type"][allele_name],
                     result["allele_match"][allele_name],
                     result["allele_details"][allele_name],
-                ) = self.assign_allele_type(blast_result, allele_file, allele_name)
+                ) = self.assign_allele_type(blast_result, allele_file, allele_name, r_seq)
             else:
                 # Sample does not have a reference allele to be matched
                 # Keep LNF info
@@ -330,12 +443,16 @@ def search_match_allele(self):
                 result["allele_details"][allele_name] = "LNF"
 
             # prepare the data for snp and alignment analysis
-            ref_allele_seq = result["allele_details"][allele_name][15]
+            try:
+                ref_allele_seq = result["allele_details"][allele_name][15]
+            except:
+                pdb.set_trace()
             allele_seq = result["allele_details"][allele_name][14]
             ref_allele_name = result["allele_details"][allele_name][3]
 
             if self.snp_request and result["allele_type"][allele_name] != "LNF":
                 # run snp analysis
+                print(allele_name)
                 result["snp_data"][allele_name] = taranis.utils.get_snp_information(
                     ref_allele_seq, allele_seq, ref_allele_name
                 )
@@ -346,6 +463,8 @@ def search_match_allele(self):
                 ] = taranis.utils.get_alignment_data(
                     ref_allele_seq, allele_seq, ref_allele_name
                 )
+        # delete blast folder
+        _ = taranis.utils.delete_folder(self.blast_dir)
         return result
 
 
@@ -360,6 +479,8 @@ def parallel_execution(
     inf_alle_obj: object,
     snp_request: bool = False,
     aligment_request: bool = False,
+    trp_limit: int = 80,
+    increase_sequence: int = 20,
 ):
     allele_obj = AlleleCalling(
         sample_file,
@@ -372,6 +493,8 @@ def parallel_execution(
         inf_alle_obj,
         snp_request,
         aligment_request,
+        trp_limit,
+        increase_sequence
     )
     sample_name = Path(sample_file).stem
     stderr.print(f"[green] Analyzing sample {sample_name}")
@@ -389,7 +512,7 @@ def collect_data(
     def stats_graphics(stats_folder: str, summary_result: dict) -> None:
         stderr.print("Creating graphics")
         log.info("Creating graphics")
-        allele_types = ["NIPHEM", "NIPH", "EXC", "PLOT", "ASM", "ALM", "INF", "LNF"]
+        allele_types = ["NIPHEM", "NIPH", "EXC", "PLOT", "ASM", "ALM", "INF", "LNF", "TPR"]
         # inizialize classification data
         classif_data = {}
         for allele_type in allele_types:
@@ -430,7 +553,7 @@ def read_reference_alleles(ref_alleles: list) -> dict[dict]:
     summary_result_file = os.path.join(output, "allele_calling_summary.csv")
     sample_allele_match_file = os.path.join(output, "allele_calling_match.csv")
     sample_allele_detail_file = os.path.join(output, "matching_contig.csv")
-    allele_types = ["NIPHEM", "NIPH", "EXC", "PLOT", "ASM", "ALM", "INF", "LNF"]
+    allele_types = ["NIPHEM", "NIPH", "EXC", "PLOT", "ASM", "ALM", "INF", "LNF", "TPR"]
     detail_heading = [
         "sample",
         "contig",
@@ -446,7 +569,10 @@ def read_reference_alleles(ref_alleles: list) -> dict[dict]:
         "gene notation",
         "product notation",
         "allele quality",
-        "sequence",
+        "match sequence",
+        "reference allele sequence",
+        "protein conversion result",
+        "predicted protein sequence",
     ]
 
     summary_result = {}  # used for summary file and allele classification graphics
@@ -501,25 +627,26 @@ def read_reference_alleles(ref_alleles: list) -> dict[dict]:
                         fo.write(",".join(detail_value) + "\n")
     # save snp to file if requested
     if snp_request:
-        snp_file = os.path.join(output, "snp_data.csv")
-        with open(snp_file, "w") as fo:
-            fo.write(
-                "Sample name,Locus name,Reference allele,Position,Ref,Alt,Codon Ref,Codon Alt,Amino Ref,Amino Alt,Category Ref,Category Alt\n"
-            )
+        for result in results:
             for sample, values in result.items():
-                for allele, snp_data in values["snp_data"].items():
-                    for ref_allele, snp_info_list in snp_data.items():
-                        for snp_info in snp_info_list:
-                            fo.write(
-                                sample
-                                + ","
-                                + allele
-                                + ","
-                                + ref_allele
-                                + ","
-                                + ",".join(snp_info)
-                                + "\n"
-                            )
+                snp_file = os.path.join(output, sample + "_snp_data.csv")
+                with open(snp_file, "w") as fo:
+                    fo.write(
+                        "Sample name,Locus name,Reference allele,Position,Ref,Alt,Codon Ref,Codon Alt,Amino Ref,Amino Alt,Category Ref,Category Alt\n"
+                    )
+                    for allele, snp_data in values["snp_data"].items():
+                        for ref_allele, snp_info_list in snp_data.items():
+                            for snp_info in snp_info_list:
+                                fo.write(
+                                    sample
+                                    + ","
+                                    + allele
+                                    + ","
+                                    + ref_allele
+                                    + ","
+                                    + ",".join(snp_info)
+                                    + "\n"
+                                )
     # create alignment files
     if aligment_request:
         alignment_folder = os.path.join(output, "alignments")
@@ -545,13 +672,11 @@ def read_reference_alleles(ref_alleles: list) -> dict[dict]:
             for ref_id, ref_seq in ref_alleles_seq[a_list].items():
                 input_buffer = StringIO()
                 # get the reference allele sequence
-                input_buffer.write(">" + ref_id + "\n")
+                input_buffer.write(">Ref_" + ref_id + "\n")
                 input_buffer.write(str(ref_seq) + "\n")
                 # get the sequences for sample on the same allele
                 for result in results:
                     for sample, values in result.items():
-                        # add sample and allele name
-                        # pdb.set_trace()
                         # discard the allele if it is LNF
                         if values["allele_type"][a_list] == "LNF":
                             continue
@@ -568,14 +693,20 @@ def read_reference_alleles(ref_alleles: list) -> dict[dict]:
                         # get the sequence of the allele in sample
                         input_buffer.write(values["allele_details"][a_list][14] + "\n")
                     input_buffer.seek(0)
-                    # pdb.set_trace()
 
                 allele_multiple_align.append(
                     taranis.utils.get_multiple_alignment(input_buffer)
                 )
+                # release memory
                 input_buffer.close()
-                pdb.set_trace()
             # save multiple alignment to file
+            with open(
+                os.path.join(alignment_folder, a_list + "_multiple_alignment.aln"), "w"
+            ) as fo:
+                for alignment in allele_multiple_align:
+                    for align in alignment:
+                        fo.write(align)
 
     # Create graphics
     stats_graphics(output, summary_result)
+    return
diff --git a/taranis/utils.py b/taranis/utils.py
index f11c0c0..43beb10 100644
--- a/taranis/utils.py
+++ b/taranis/utils.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+import Bio.Data.CodonTable
 import glob
 import gzip
 import io
@@ -22,8 +23,13 @@
 from pathlib import Path
 from Bio import SeqIO
 from Bio.Seq import Seq
+from collections import OrderedDict
 
 
+import warnings
+from Bio import BiopythonWarning
+
+import pdb
 log = logging.getLogger(__name__)
 
 
@@ -111,6 +117,51 @@ def check_additional_programs_installed(software_list: list) -> None:
             sys.exit(1)
     return
 
+def convert_to_protein(sequence: str, force_coding: bool =False, check_additional_bases: bool = False) -> dict:
+    """Check if the input sequence is a coding protein.
+
+    Args:
+        sequence (str): sequence to be checked
+        force_coding (bool, optional): force to check if sequence is coding. Defaults to False.
+        check_additional_bases (bool, optional): if not multiple by 3 remove the latest sequences to check they are added after the stop codon. Defaults to False.
+
+    Returns:
+        dict: protein sequence and/or error message
+    """
+    conv_result = {}
+    # checck if exists start codon
+    if sequence[0:3] not in START_CODON_FORWARD:
+        return {"error": "Sequence does not have a start codon"}
+    if len(sequence) % 3 != 0:
+        if not check_additional_bases:
+            return {"error" : "Sequence is not a multiple of three"}
+        # Remove the last or second to last bases to check if there is a stop codon
+        new_seq_len = len(sequence) // 3 * 3
+        sequence = sequence[:new_seq_len]
+        # this error will be overwritten if another error is found
+        conv_result["error"] = "additional bases added after stop codon"
+
+    seq_sequence = Seq(sequence)   
+    try:
+        seq_prot = seq_sequence.translate(table=1, cds=force_coding)
+    except Bio.Data.CodonTable.TranslationError as e:
+        log.info("Unable to translate sequence. Info message: %s ", e)
+        return {"error": e}
+    # get the latest stop codon
+    last_stop = seq_prot.rfind("*")
+    # if force_coding is False, check if there are multiple stop codons
+    if not force_coding:
+        first_stop = seq_prot.find("*")
+        if first_stop != last_stop:
+            return {"error": "Multiple stop codons","protein": str(seq_prot)}
+    if last_stop != len(seq_prot) - 1:
+        return {"error": "Last sequence is not a stop codon", "protein": str(seq_prot)}
+    if "error" in conv_result:
+        conv_result["protein"] = str(seq_prot)
+        return conv_result
+    return {"protein": str(seq_prot)}       
+    
+
 
 def create_annotation_files(
     fasta_file: str,
@@ -334,7 +385,6 @@ def get_multiple_alignment(input_buffer: io.StringIO) -> list[str]:
     Returns:
         list[str]: list of aligned sequences
     """
-    #
     output_buffer = io.StringIO()
     # Run MAFFT
     mafft_command = "mafft --auto --quiet -"  # "-" tells MAFFT to read from stdin
@@ -353,7 +403,7 @@ def get_multiple_alignment(input_buffer: io.StringIO) -> list[str]:
 
     # Close the file objects and process
     output_buffer.close()
-    process.close()
+    process.stdout.close()
 
     return multi_result
 
@@ -374,13 +424,24 @@ def get_snp_information(
     Returns:
         dict: key: ref_sequence, value: list of snp information
     """
+    # Supress warning that len of alt sequence  not a multiple of three
+    warnings.simplefilter('ignore', BiopythonWarning)
     snp_info = {}
     ref_protein = str(Seq(ref_sequence).translate())
-    alt_protein = str(Seq(alt_sequence).translate())
+    try:
+        alt_protein = str(Seq(alt_sequence).translate())
+    except Exception as e:
+        import pdb; pdb.set_trace()
+        
+    if len(alt_sequence) %3 != 0:
+        import pdb; pdb.set_trace()
 
     snp_line = []
-    for idx, (ref, alt) in enumerate(zip(ref_sequence, alt_sequence)):
-        if alt != ref:
+    # get the shortest sequence for the loop
+    length_for_snp = min(len(ref_sequence), len(alt_sequence))
+    for idx in range(length_for_snp):
+
+        if ref_sequence[idx] != alt_sequence[idx]:
             # calculate the triplet index
             triplet_idx = idx // 3
             # get triplet code
@@ -395,8 +456,8 @@ def get_snp_information(
             snp_line.append(
                 [
                     str(idx),
-                    ref,
-                    alt,
+                    ref_sequence[idx],
+                    alt_sequence[idx],
                     ref_triplet,
                     alt_triplet,
                     ref_aa,
@@ -604,7 +665,22 @@ def read_compressed_file(
     return out_data
 
 
-def read_fasta_file(fasta_file):
+def read_fasta_file(fasta_file: str, convert_to_dict=False) -> dict | str:
+    """Read the fasta file and return the data as a dictionary if convert_to_dict
+
+    Args:
+        fasta_file (str): _description_
+        convert_to_dict (bool, optional): _description_. Defaults to False.
+
+    Returns:
+        dict: fasta id as key and sequence as value in str format
+    """
+    conv_fasta = OrderedDict()
+    if convert_to_dict:
+        with open(fasta_file, "r") as fh:
+            for record in SeqIO.parse(fh, "fasta"):
+                    conv_fasta[record.id] = str(record.seq)
+        return conv_fasta 
     return SeqIO.parse(fasta_file, "fasta")