-
Notifications
You must be signed in to change notification settings - Fork 3
/
Split_in_juncfiles.py
executable file
·72 lines (57 loc) · 2.25 KB
/
Split_in_juncfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
@authors: Juan L. Trincado
@email: [email protected]
Split_in_juncfiles.py: Takes the file with the read counts of the junctions and all the samples
and generates a .junc file per sample for running LeafCutter
"""
import sys
import pandas as pd
import time
import os
def main():
try:
readCounts_path = sys.argv[1]
output_path = sys.argv[2]
#
# readCounts_path = "/projects_rg/SCM/tables/readCounts_LUAD_LUSC_Junckey.tab"
# output_path = "/home/juanluis/Desktop/Work/Junc_files_aux"
# 1. Load the junction file with pandas
print("Loading phenotype...")
readCounts_file = pd.read_table(readCounts_path, delimiter="\t")
path = readCounts_path.split("/")
del path[-1]
# path2 = "/".join(path)+"/Junc_files"
path2 = output_path
# 2. Per column, we will create a data frame and save it
#Create the directory, if it doesn't exist
if not os.path.exists(path2):
os.makedirs(path2)
# Open a file for putting the location of each file
path_index = path2 + "/index_juncfiles.txt"
index_file = open(path_index, "w")
for i in range(8,len(readCounts_file.columns)):
print(i)
list = [1,2,3,4]
list.insert(3, i)
aux_df = readCounts_file.iloc[:,list]
#Add an extra column with dots and resort the columns
aux_df['aux'] = "."
cols = aux_df.columns.tolist()
cols2 = cols[0:3] + cols[5].split(" ") + cols[3:5]
aux_df = aux_df[cols2]
idSample = readCounts_file.columns.values[i]
nameFile = path2 + "/" + idSample + ".junc"
#Save the dataframe
print("Creating files "+nameFile+"...")
aux_df.to_csv(nameFile, sep="\t", index=False, float_format='%.f', header=False)
#Save the path to each junc file in an external text file
index_file.write(nameFile+"\n")
index_file.close()
print("Done. Exiting program. "+time.strftime('%H:%M:%S')+"\n\n")
exit(0)
except Exception as error:
print('\nERROR: ' + repr(error))
print("Aborting execution")
sys.exit(1)
if __name__ == '__main__':
main()