Skip to content

Commit

Permalink
fixing bug when stop codon was not found
Browse files Browse the repository at this point in the history
  • Loading branch information
luis committed Oct 30, 2018
1 parent 863c673 commit b850d56
Showing 1 changed file with 17 additions and 10 deletions.
27 changes: 17 additions & 10 deletions taranis.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,10 +425,10 @@ def get_aligments_for_deletions (sample_seq, query_seq):
def create_summary (samples_matrix_dict, logger) :
summary_dict = {}
summary_result_list = []
summary_heading_list = ['Exact match', 'INF', 'ASM_INSERT', 'ASM_DELETE','ALM_INSERT' ,'ALM_DELETE', 'LNF','NIPH','NIPHEM','PLOT']
summary_heading_list = ['Exact match', 'INF', 'ASM_INSERT', 'ASM_DELETE','ALM_INSERT' ,'ALM_DELETE', 'LNF','NIPH','NIPHEM','PLOT','ERROR']
summary_result_list.append('File\t' + '\t'.join(summary_heading_list))
for key in sorted (samples_matrix_dict) :
summary_dict[key] = {'Exact match':0, 'INF':0, 'ASM_INSERT':0, 'ASM_DELETE':0, 'ALM_INSERT':0, 'ALM_DELETE':0, 'LNF':0, 'NIPH':0, 'NIPHEM':0, 'PLOT':0}
summary_dict[key] = {'Exact match':0, 'INF':0, 'ASM_INSERT':0, 'ASM_DELETE':0, 'ALM_INSERT':0, 'ALM_DELETE':0, 'LNF':0, 'NIPH':0, 'NIPHEM':0, 'PLOT':0, 'ERROR':0}
for values in samples_matrix_dict[key] :
if 'INF_' in values :
summary_dict[key]['INF'] += 1
Expand All @@ -448,6 +448,8 @@ def create_summary (samples_matrix_dict, logger) :
summary_dict[key]['NIPHEM'] += 1
elif 'PLOT' in values :
summary_dict[key]['PLOT'] += 1
elif 'ERROR' in values :
summary_dict[key]['ERROR'] += 1
else:
try:
number =int(values)
Expand All @@ -469,7 +471,9 @@ def create_summary (samples_matrix_dict, logger) :
return summary_result_list



def loadingBar(count,total,size):
percent = float(count)/float(total)*100
sys.stdout.write("\r" + str(int(count)).rjust(3,'0')+"/"+str(int(total)).rjust(3,'0') + ' [' + '='*int(percent/10)*size + ' '*(10-int(percent/10))*size + ']')

def allele_call_nucleotides ( core_gene_dict_files, reference_query_directory, sample_dict_files, blast_db_directory, inputdir, outputdir, cpus , percentlength, schema_variability, logger ):
full_gene_list = []
Expand Down Expand Up @@ -497,9 +501,12 @@ def allele_call_nucleotides ( core_gene_dict_files, reference_query_directory,
header_snp = ['Sample Name','Core Gene', 'Position','Sequence Sample/Schema','Protein in Sample/Schema', 'Annotation Sample / Schema']
header_protein = ['Sample Name','Core Gene', 'Protein in ' , 'Protein sequence']
header_match_alignment = ['Sample Name','Core Gene','Alignment', 'Sequence']


number_of_genes = len(core_gene_dict_files)
print('Allele calling starts')
for core_file in core_gene_dict_files:
print ( 'Analyzing core file : ', core_file)
#loadingBar(count,total,size)
#print ( 'Analyzing core file : ', core_file)
full_gene_list.append(os.path.basename(core_file))
logger.info('Processing core gene file %s ', core_file)
core_name = os.path.basename(core_file)
Expand All @@ -522,7 +529,6 @@ def allele_call_nucleotides ( core_gene_dict_files, reference_query_directory,
samples_inferred = []
#allele_list_per_sample = []
for sample_file in sample_dict_files:
#print('sample file is: ', sample_file)
#with open (sample_file,'rb') as sample_f :
# sample_dict = pickle.load(sample_f)
#logger.debug('loaded in memory the sample file %s' , sample_file)
Expand Down Expand Up @@ -780,7 +786,7 @@ def allele_call_nucleotides ( core_gene_dict_files, reference_query_directory,
sample_gene_sequence = accession_sequence[int(sstart) - 51 : int(send) ]
sample_gene_sequence = sample_gene_sequence.reverse_complement()
else:
sample_gene_sequence = accession_sequence[int(send) -1 : int(sstart) + 51]
sample_gene_sequence = accession_sequence[int(send) -1 : int(sstart) + 51]
else:
if int(sstart) > int (send):
sample_gene_sequence = accession_sequence[int(send) - 51 : int(sstart) ]
Expand Down Expand Up @@ -862,6 +868,7 @@ def allele_call_nucleotides ( core_gene_dict_files, reference_query_directory,
protein_dict[core_name][sample_value] = nucleotide_to_protein_aligment(new_sseq, qseq )
else:
logger.error('ERROR : Stop codon was not found for the core %s and the sample %s', core_name, sample_value)
samples_matrix_dict[sample_value].append('ERROR not stop codon when deletion')

#if int(s_length) > int(query_length) :
elif int(s_length) > max(schema_variability[core_name]) :
Expand Down Expand Up @@ -943,10 +950,10 @@ def allele_call_nucleotides ( core_gene_dict_files, reference_query_directory,


else:
samples_matrix_dict[sample_value].append('ERROR ')
samples_matrix_dict[sample_value].append('ERROR not stop codon when insertion')

print ('ERROR when looking the allele match for core gene ', core_name, 'at sample ', sample_value )

'''
logger.debug ('matching genes = %s', matching_genes_dict)
logger.debug ('---------------------------------------------------')
logger.debug ('sample matrix = %s', samples_matrix_dict)
Expand All @@ -967,7 +974,7 @@ def allele_call_nucleotides ( core_gene_dict_files, reference_query_directory,
logger.debug ('---------------------------------------------------')
logger.debug ('list of proteins = %s' , protein_dict)
logger.debug ('---------------------------------------------------')

'''



Expand Down

0 comments on commit b850d56

Please sign in to comment.