-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathglobal.R
82 lines (67 loc) · 2.7 KB
/
global.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
library(plyr)
library(dplyr)
require(RCurl)
# load data through dropbox
# d2<-read.csv(url('https://www.dropbox.com/s/9xf62j35vja2n0h/Budworm%20traps%202015%20with%20GPSgoogle.csv?raw=1'), header = FALSE, stringsAsFactors = FALSE)
process_raw_data = function(myCsv){
d2<-read.csv(myCsv, header = FALSE, stringsAsFactors = FALSE)
d1<-d2[8:nrow(d2),]
dates<-as.Date(d1$V1,format = '%d/%m/%Y')
d1<-as.matrix(d1)
d1<-apply(d1,2,function(x)gsub('\\s+', '',x))
d1<-d1[,-1]
d1[d1[]=='']<-NA
d<-as.data.frame(d1, stringsAsFactors = FALSE) # remove whitespace
trap1<-d2[1:7,]
trap1[trap1[]=='']<-NA
trap<-as.matrix(t(trap1))
colnames(trap) = trap[1, ] # the first row will be the header
trap = trap[-1, ] # removing the first row.
for (myCol in c("State", "Location","District", "Operator" )){
trap[is.na(trap[,myCol]),myCol]<-'MISSING_INFORMATION'
}
# create clean data frame
# initialise dataframe
tcnames<-c('State', 'Location', 'District', 'Latitude','Longitude', 'Operator')
dcnames<-c('Date', 'Count','Misc','ID', tcnames)
df<-data.frame(matrix(NA,ncol = length(dcnames), nrow = length(d1[!is.na(d1[])])))
names(df)<-dcnames
df$Date<-as.Date(df$Date)
count<-1
for (j in 1:ncol(d)){
for(i in 1:nrow(d)){
if(!is.na(d1[i,j])){
moths<-suppressWarnings(as.numeric(d1[i,j]))
if(is.na(moths)){
df[count,'Misc']<-d1[i,j]
}else{
df[count,'Count']<-moths
}
df[count,tcnames]<-trap[j,tcnames]
df[count,'Date']<-dates[i]
df[count,'ID']<-j
count = count + 1
}
}
}
df$Latitude<-as.numeric(df$Latitude)
df$Longitude<-as.numeric(df$Longitude)
df$YearWeek<-df$Date
names(df)<- tolower(names(df))
# standardise
mygroups<-group_by(df, id, latitude, longitude)
cleantable <- df %>% filter(!is.na(longitude)&is.na(misc))
cleantable$id<-factor(cleantable$id)
return(cleantable)
}
# load H. punctigera data through google doc
punctigera_data = process_raw_data(
"https://docs.google.com/spreadsheets/d/16tksOn7SAv4ezJCX_z4no4MpJDtua6pGqVPjfg57OZ8/pub?gid=0&single=true&output=csv")
# load H. armigera data through google doc
armigera_data = process_raw_data(
"https://docs.google.com/spreadsheets/d/16tksOn7SAv4ezJCX_z4no4MpJDtua6pGqVPjfg57OZ8/pub?gid=1462632689&single=true&output=csv")
# load S. frugiperda data through google doc
frugiperda_data = process_raw_data(
"https://docs.google.com/spreadsheets/d/e/2PACX-1vTP_GalHvmk6J0MkFTjAbm7NGhgntiYXM5Y-YHcrfFH6eCcgl64NirtgcC88adJxf6jtlCIU2d4FZqr/pub?gid=440708979&single=true&output=csv")
# warning('not using internet')
# d2<-read.csv('Helicoverpa trap data - H. punctigera.csv', header = FALSE, stringsAsFactors = FALSE)