-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstring analysis input
53 lines (39 loc) · 1.84 KB
/
string analysis input
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#This script outputs a file with the A.thaliana orthologs for the list of genes in a text file
#Input for the STRING analysis
#change file path as appropriate
#file containing all the genes from the orientated file
gene_list_file = '/Users/rhianah/Downloads/Project_3/gatk/GS_1/LABNEN/genes/thalianatext.txt'
# Define an empty dictionary to store the orthos for each gene
#put it in dictionary with cochlearia gene to A thaliana gene
genes_dict = {}
# Loop through each line in the file
with open(gene_list_file, 'r') as f:
for line in f:
# Split the line into its columns
cols = line.strip().split(',')
gene = cols[0].replace('.t1', '').replace('.t2', '').replace('.t3', '').replace('.t4', '')
ortho = cols[1]
# If the gene is not in the dictionary, create a new list for it
if gene not in genes_dict:
genes_dict[gene] = []
# Add the ortho to the gene's list
genes_dict[gene].append(ortho)
# Print the dictionary
print(genes_dict)
#Customise this bit, contains the gene lists
gene_list = '/Users/rhianah/Downloads/Project_3/gatk/GS_1/LABNEN/genes/LABNEN_DxyFst_filterfinal.txt'
output_file = '/Users/rhianah/Downloads/Project_3/gatk/GS_1/LABNEN/genes/labnendxyfstortho.txt'
count = 0
with open(gene_list, 'r') as f, open(output_file, 'w') as out_f:
for gene_name in f:
gene_name = gene_name.strip()
# Check if the gene name is in the dictionary
if gene_name in genes_dict:
# Access the orthologs for the gene name and print them
orthologs = genes_dict[gene_name]
out_f.write(','.join(orthologs) + '\n')
print(f"Orthologs for {gene_name}: {orthologs}")
count += 1
else:
print(f"No orthologs found for {gene_name}")
print(f"number of hits: {count}")