-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompoundcounter.py
109 lines (81 loc) · 3.35 KB
/
compoundcounter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
'''
Reads in csv of mzs, outputs how many times the mzs
were present in a number of input files
'''
# import packages. csv is standard package and pandas is widely used and part of Anaconda packages
import csv
import pandas as pd
# short helper to pass into map. Takes number and rounds to 3 decimal places
def roundto4(x):
return round(x, 4)
# Takes list of values and returns the unique m/z values and their counts
def finder(in_list, compounds):
# Rounds each value in each list of mzs (1 list per file input)
for i in range(len(in_list)):
in_list[i] = map(roundto4, in_list[i])
# Rounds compounds to 4 digits (even if entered as 4 digits sometimes exact values not passed through)
compounds = map(roundto4, compounds)
# Sets up helper lists
counts = [0] * len(compounds)
checked = [0] * len(compounds)
# For each unique m/z, get a count of how many times it appears
# NOTE, assumes input csv of compounds is sorted
# for each compound of interest
for x in compounds:
# check in each of the files
for i in in_list:
# and if found, add to the count.
if x in i:
index = compounds.index(x)
if checked[index] == 0:
counts[index] += 1
# Handles when an m/z is in the compound list twice to get around python's .index() function
else:
counts[index + checked[index]] += 1
checked[compounds.index(x)] += 1
return [compounds, counts]
# Takes a list containing two lists of equal lengths and writes them to
# a csv (need to have .csv in filename
def writeToCSV(mzs_and_counts, output_filename):
with open(output_filename, 'wb') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['mzs', 'count'])
for x in range(0, len(mzs_and_counts[1])):
writer.writerow([mzs_and_counts[0][x], mzs_and_counts[1][x]])
# Reads in mz values from a file and returns list of mzs
def readFromCSV(filename):
df = pd.read_csv(filename)
# Make sure file has a column named mz
out = list(df['mz'])
return out
# Compound file must have mz as column header
def processDuplicates(files_list, outputname, compound_file):
mzs = []
compounds = readFromCSV(compound_file)
for f in files_list:
mzs.append(readFromCSV(f))
writeToCSV(finder(mzs, compounds), outputname)
# Example use
# nfiles = [
# "neg-ACM_sept16_T1R2_GL2_method1.csv",
# "neg-ACM_sept16_T1R2_GL7_method1.csv",
# "neg-ACM_sept16_T1R2_GL21_method1.csv",
# "neg-ACM_sept16_T1R3_GL7_method1.csv",
# "neg-ACM_sept16_T1R3_GL20_method1.csv",
# "neg-ACM_sept16_T1R3_GL21_method1.csv"
# ]
#
# pfiles = [
# "pos-ACM_sept16_T1R2_GL2_method1.csv",
# "pos-ACM_sept16_T1R2_GL7_method1.csv",
# "pos-ACM_sept16_T1R2_GL21_method1.csv",
# "pos-ACM_sept16_T1R3_GL7_method1.csv",
# "pos-ACM_sept16_T1R3_GL20_method1.csv",
# "pos-ACM_sept16_T1R3_GL21_method1.csv"
# ]
# compoundfilep = "GLexactmasses-pos.csv"
# compoundfilen = "GLexactmasses-neg.csv"
#
# processDuplicates(nfiles, 'method1-neg-compounds.csv', compoundfilen)
# processDuplicates(pfiles, 'method1-pos-compounds.csv', compoundfilep)
print('compounds have been compared.')