-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrearranger2.R
175 lines (123 loc) · 5.49 KB
/
rearranger2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# Rearrange csv files for Jayanti
# Install packages. You only need to run this once per computer. It won't hurt anything if
# you run it again, it just takes a little time.
install.packages("openxlsx")
install.packages("stringr")
install.packages("installr")
# Load packages. You only need to run this once per R session. It won't hurt anything if
# you run it again, it just takes a little time.
library(openxlsx)
library(stringr)
library(installr)
# Only run this line if you have not already installed rtools on the computer.
# This takes a long time to run so don't run it if you don't need it.
# Also make sure to check the PATH option during the install and/or run the line
# below install.rtools(). You may need to update the file path to where your download is saved.
install.rtools()
# Sys.setenv(R_ZIPCMD= "C:/Rtools/bin/zip")
# if you've installed Rtools correctly you should be able to run the following command and get
# a similar response. As long as the response to the command is not "zip" you should be set.
# > Sys.getenv("R_ZIPCMD", "zip")
# [1] "C:/Rtools/bin/zip"
#### INPUTS - set these values.
# Filepath to folder that holds 1 folder for each Rep
# Note the slashes are going the opposite direction that they normally do on a windows machine
headfolder <- '/Users/ericroden/Downloads/Jayanti/'
# xlsx file output name. Needs the .xlsx file extension
outputName <- 'testOutput.xlsx'
# The headfolder will be used to read in the CSVs
# NOTE: THIS GETS ALL OF THE CSV FILES IN THE SUB-FOLDERS
# SO MAKE SURE THE ONLY CSV FILES IN THE SUB-FOLDERS ARE THE ONES YOU
# WANT TO READ.
# set and store max rt value and min rt value
rt.min <- 11.625
rt.max <- 11.715
####
#### Load these functions
# works with arranged to write the output files
write.arranged <- function(wb, dirsList, rt.min, rt.max){
for(i in 1:length(dirsList)){
# loops over each directory in the head folder
addWorksheet(wb, str_sub(dirsList[i], 3))
setwd(dirsList[i])
fileList <- list.files()[grepl(".CSV", list.files())]
# loop through each file
for(j in 1:length(fileList)){
# arrange each file
arranged <- arranger(fileList[j], rt.min, rt.max)
# set up top row
sampleNumber <- paste0('Sample', j, '-', i)
padding <- rep(NA, ncol(arranged)-1)
topRow <- c(sampleNumber, padding)
# set up data for binding
topRow2 <- data.frame(t(topRow))
colnames(topRow2) <- colnames(arranged)
# Attach header row
arranged <- rbind(topRow2, arranged)
# write data to workbook. 1 sheet per rep, with a spacer column
writeData(wb, i, arranged, startCol = (((j-1)*(ncol(arranged)+1))+1), colNames = FALSE)
}
# reset wd
setwd(headfolder)
}
}
# Takes a filename, rt.min, and rt.max and rearranges it. The script then writes a csv using the same
# filename with 'Arranger-' added to the front.
arranger <- function(filename, rt.min, rt.max){
# read in the csv
data1 <- read.csv(filename, stringsAsFactors = FALSE)
# knock off the unneeded data at the top and remove the excess columns
data1 <- data1[2:nrow(data1), c(1,2)]
# Find the locations where the columns should split
cutpoints <- which(grepl("Ion", data1[,1]))
# Add the end point
cutpoints <- append(cutpoints, nrow(data1)+1)
# list of ions
ionNames <- data1[grepl('Ion', data1[ ,1]), 1]
# Set up column names
ionNames <- str_sub(ionNames, end = ((gregexpr(pattern = ' ', ionNames)[[1]][2])-1))
colHeads <- c('RT', ionNames)
# make a dataframe and store the first two columns cut at the appropriate location
df <- data.frame(data1[cutpoints[1]:(cutpoints[2]-1), 1:2])
# running column names list to avoid issues with indexing/column naming
columnNames <- c('rt', 'peak1')
# for each additional cut point, cut the data and place it in new columns.
for(i in 2:(length(cutpoints)-1)) {
# generate new column names
# I think this should be okay...
# columnNames <- append(columnNames, paste0('rt',i))
columnNames <- append(columnNames, paste0('peak',i))
### Add in code to add a blank column (df[,'colname' or index] <- NA)
# add new columns
# This fixes the need to only have the rt once
# df <- transform(df, newColumn=data1[cutpoints[i]:(cutpoints[i+1]-1), 1])
df <- transform(df, newColumn=data1[cutpoints[i]:(cutpoints[i+1]-1), 2])
# rename columns
colnames(df) <-columnNames
}
# find the right data (right rt window)
returned <- df[df$rt >= rt.min & df$rt <= rt.max, ]
# set up data for binding
colHeads2 <- data.frame(t(colHeads))
colnames(colHeads2) <- colnames(returned)
# Attach header row
returnFrame <- rbind(colHeads2, returned)
#return the data.
returnFrame
}
####
#### Run this section
# set directory to containing folder
setwd(headfolder)
# get list of folders contained within the head folder
dirsList <- list.dirs(recursive = FALSE)
# NOTE: THIS SCRIPT WORKS BY GETTING ALL OF THE CSV FILES IN THE SUBFOLDERS (BELOW THE HEADFOLDER)
# SO MAKE SURE THE ONLY CSV FILES IN THOSE FOLDERS ARE THE ONES YOU WANT TO READ.
# create an xlsx workbook
wb <- createWorkbook()
# write your files
write.arranged(wb, dirsList = dirsList, rt.min, rt.max)
# No file has been saved yet, you must run the last two commands to do so.
# save the newly completed workbook. note: this will overwrite any existing workbook with the same name.
setwd(headfolder)
saveWorkbook(wb, file = outputName, overwrite = TRUE)