-
Notifications
You must be signed in to change notification settings - Fork 0
/
Lab 9.qmd
118 lines (92 loc) · 2.36 KB
/
Lab 9.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
---
title: "Lab 9"
author: "Dana Gonzalez"
format: html
editor: visual
embed-resources: true
theme: cosmo
---
# Load Libraries
```{r, message=FALSE, results='hide'}
library(microbenchmark)
library(dplyr)
library(tidyverse)
library(parallel)
```
# Problem 1
```{r, message=FALSE, warning = FALSE}
# Check Number of Cores
parallel::detectCores()
# "Slow" Version
fun1 <- function(n = 100, k = 4, lambda = 4) {
x <- NULL
for (i in 1:n){
x <- rbind(x, rpois(k, lambda))}
return(x)}
# "Fast" Version
fun1alt <- function(n = 100, k = 4, lambda = 4) {
x <- matrix(rpois(n * k, lambda), nrow = n, ncol = k)
return(x)}
# Check Dimensions of "fun1" and "fun1alt"
dim(fun1())
dim(fun1alt())
# Check Distribution of Values of "fun1" and "fun1alt"
summary(fun1())
summary(fun1alt())
# Checking Speeds of "fun1" and "fun1alt"
microbenchmark::microbenchmark(
fun1(),
fun1alt())
```
# Problem 2
```{r, message=FALSE, warning = FALSE}
# Data Generating Process (10 x 10,000 matrix)
set.seed(1234)
x <- matrix(rnorm(1e4), nrow=10)
# Find Each Column's Max Value
fun2 <- function(x) {
apply(x, 2, max)}
fun2alt <- function(x) {
apply(x, 2, max)}
max.col(fun2(x))
max.col(fun2alt(x))
# Check Outputs of "fun2" and "fun2alt"
results_fun2 <- fun2(x)
results_fun2alt <- fun2alt(x)
identical(results_fun2, results_fun2alt)
# Checking Speeds of "fun2" and "fun2alt"
microbenchmark::microbenchmark(
fun1(),
fun1alt())
```
# Problem 3
```{r, message=FALSE, warning = FALSE}
my_boot <- function(dat, stat, R, ncpus = 1L) {
# Getting the Random Indices
n <- nrow(dat)
idx <- matrix(sample.int(n, n * R, TRUE), nrow = n, ncol = R)
# Parallelization
ans <- mclapply(seq_len(R), function(i) {
result <- stat(dat[idx[, i], , drop = FALSE])
return(result)
}, mc.cores = ncpus)
# Converting the List Into a Matrix
ans <- do.call(rbind, ans)
return(ans)
}
# Check Results by Comparing to a Parametric Model
my_stat <- function(d) {
coef(lm(y ~ x, data = d))
}
set.seed(1)
n <- 500; R <- 1e4
x <- cbind(rnorm(n))
y <- x*5 + rnorm(n)
ans0 <- confint(lm(y~x))
ans1 <- my_boot(dat = data.frame(x, y), stat = my_stat, R = R, ncpus = 2L)
t(apply(ans1, 2, quantile, c(.025,.975)))
t(apply(ans0, 2, quantile, c(.025,.975)))
# Checking Speed
system.time(my_boot(dat = data.frame(x, y), stat = my_stat, R = R, ncpus = 1L))
system.time(my_boot(dat = data.frame(x, y), stat = my_stat, R = R, ncpus = 2L))
```