-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathold.R
127 lines (90 loc) · 3.22 KB
/
old.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Import dplyr
library(dplyr)
library(magrittr)
library(lubridate)
library(zoo)
library(ggplot2)
dataset <- read.csv("dataset.csv")
# Check for missing values
apply(dataset, 2, function(x) any(is.na(x)))
# Find out number of entries
nrow(dataset)
nrow(dataset[sample(nrow(dataset), 0.1*nrow(dataset)), ])
# take 10% sample
sampleData <- dataset[sample(nrow(dataset), 0.1*nrow(dataset)), ]
sampleData <- as.numeric(rownames(NAdata))
# Check for missing values
dataset[NAdata,5] <- NA
apply(dataset, 2, function(x) any(is.na(x)))
res<-do.call(cbind,lapply(lapply(dataset[,5:7],
function(x) data.frame(x)),
function(x) x[sample(1:nrow(x),0.1*nrow(x)),]))
dataset[,5][dataset[,5]%in%res[,1]]<-NA
dataset[,6][dataset[,6]%in%res[,2]]<-NA
dataset[,7][dataset[,7]%in%res[,3]]<-NA
head(dataset)
# Select the President, Date, and Approve columns and filter to observations where President is equal to "Trump"
# Select State, Type, and Total Deaths
selectFew <- dataset %>%
select(State,Type,Total) %>%
filter(State == "Karnataka") %>%
head()
write.csv(selectFew, "selectFew.csv")
dummyEx <- fastDummies::dummy_cols(dataset)
write.csv(dummyEx, "categories_encoded.csv")
womenDied <- dummyEx %>%
select(Year, Gender_Female, Total) %>%
group_by(Year) %>%
summarise(Died = mean(Total))
ggplot(data = womenDied, aes(x=Year,y=Died)) +
geom_line()
write.csv(womenDied, "womenDied.csv")
normalized<-function(y) {
x<-y[!is.na(y)]
x<-(x - min(x)) / (max(x) - min(x))
y[!is.na(y)]<-x
return(y)
}
womenDiedNorm <- cbind(womenDied[1], apply(womenDied[,ncol(womenDied)],2,normalized))
write.csv(womenDiedNorm, "womenDied_Normalised.csv")
library("ggpubr")
ggqqplot(womenDiedNorm$Died)
ggdensity(womenDiedNorm$Died,
main = "Density plot of Women Died",
xlab = "Women Died")
shapiro.test(womenDiedNorm$Died)
qqnorm(womenDiedNorm$Died, pch = 1, frame = FALSE)
qqline(womenDiedNorm$Died, col = "red", lwd = 2)
library("car")
qqPlot(womenDiedNorm$Died)
newdata <- dataset %>%
select(State,Type,Total) %>%
group_by(Type) %>%
summarise(Died = mean(Total))
deathByLoveAffairs <- dataset %>%
select(State,Year,Type,Total) %>%
filter(Type == "Love Affairs")
deathByHanging <- dataset %>%
select(State,Year,Type,Total) %>%
filter(Type == "Family Problems")
# Take a mean of the How Many Died
mean(deathByHanging)
deathByHanging <- deathByHanging %>%
mutate(AvgDeath = rollmean(Total, 10, na.pad=TRUE, align = "right"))
ggplot(data = deathByHanging, aes(x=Year,y=AvgDeath)) +
geom_line()
deathByLoveAffairs %>%
group_by(Year) %>%
summarise(Total = mean(Total))
deathByLoveAffairs %>%
arrange(Year)
deathByLoveAffairs <- deathByLoveAffairs %>%
mutate(AvgDeath = rollmean(Total, 10, na.pad=TRUE, align = "right"))
ggplot(data = deathByLoveAffairs, aes(x=Year,y=AvgDeath)) +
geom_line()
allDeaths <- dataset %>%
group_by(Type) %>%
mutate(AvgDeath = rollmean(Total, 10, na.pad=TRUE, align = "right"))
# Graph an moving average of each president's approval rating
ggplot(data = allDeaths, aes(x=Year, y=AvgDeath, col=Type)) +
geom_line()