Skip to content

Commit

Permalink
Merge pull request #40 from youngroklee-ml/39-ch4
Browse files Browse the repository at this point in the history
39 ch4
  • Loading branch information
youngroklee-ml authored Mar 7, 2024
2 parents c90fb19 + 32c1bbf commit 086396a
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 288 deletions.
139 changes: 24 additions & 115 deletions chapters/ch04_dimension_reduction.qmd
Original file line number Diff line number Diff line change
@@ -1,155 +1,64 @@
# 4장 차원축소

## (예 4.5, 4.7)

#### 패키지 로드

```{r}
library(Matrix)
```
# 4장 차원축소 회귀분석

## (예 4.5 - 4.8)

#### 데이터 로드

```{r}
x <- matrix(
c(
1, 2, 3, 2,
-1, 0, -1, -1,
0, -2, -2, -1
),
nrow = 3,
byrow = TRUE
)
print(x)
dat1 <- read.csv(file = "data/ch4_dat1.csv")
dat1
x <- as.matrix(dat1)
```

#### (예 4.5)

행렬 차원

```{r}
n <- dim(x)[1]
k <- dim(x)[2]
```

특이치 분해

```{r}
s <- svd(x, nu = n, nv = k)
diag(s$d, nrow = n, ncol = k)
s <- svd(x)
diag(s$d)
s$u
s$v
```

행렬 복원 확인

```{r}
all.equal(x, s$u %*% diag(s$d, nrow = n, ncol = k) %*% t(s$v))
```

#### (예 4.6)

제곱합-교차곱 행렬에 대한 특이치 분해
분산-공분산 행렬의 특이치 분해

```{r}
xtx <- t(x) %*% x
print(xtx)
n_xtx <- dim(xtx)[1]
k_xtx <- dim(xtx)[2]
s_xtx <- svd(xtx, nu = n_xtx, nv = k_xtx)
diag(s_xtx$d, nrow = n_xtx, ncol = k_xtx)
s_xtx$u
s_xtx$v
cov(x)
svd(cov(x))
```

고유값-고유벡터 확인
상관계수 행렬의 특이치 분해

```{r}
xtx %*% s_xtx$v[, 2]
s_xtx$v[, 2] * s_xtx$d[2]
cor(x)
svd(cor(x))
```


#### (예 4.7)

행렬의 rank

```{r}
r <- rankMatrix(x)
```

특이치 분해

```{r}
# singular value decomposition of x
s <- svd(x, nu = r, nv = r)
diag(s$d[1:r])
s$u
s$v
```

행렬 복원 확인

```{r}
all.equal(x, s$u %*% diag(s$d[1:r]) %*% t(s$v))
```



## (예 4.6 - 4.7)

#### 패키지 로드
주성분 스코어

```{r}
library(Matrix)
x %*% s$v
```

#### 데이터 로드

```{r}
dat1 <- read.csv(file = "data/ch4_dat1.csv")
x <- as.matrix(dat1)
```
#### (예 4.8)

#### (예 4.6)

행렬 차원
제곱합-교차곱 행렬의 고유치 및 고유벡터

```{r}
n <- dim(x)[1]
k <- dim(x)[2]
eigen(t(x) %*% x)
```

특이치 분해

```{r}
s <- svd(x, nu = n, nv = k)
diag(s$d, nrow = n, ncol = k)
s$u
s$v
```


#### (예 4.7)

행렬 rank

```{r}
r <- rankMatrix(x)
```

특이치 분해

```{r}
s <- svd(x, nu = r, nv = r)
diag(s$d[1:r])
s$u
s$v
```


## (예 4.12)
## (예 4.10)

#### 데이터 로드

Expand Down Expand Up @@ -199,7 +108,7 @@ barplot(rate_var,
```


## (예 4.14)
## (예 4.12)

#### 데이터 로드

Expand Down Expand Up @@ -247,7 +156,7 @@ anova(lm_fit)
```


## (예 4.16 - 4.17)
## (예 4.14 - 4.15)

#### 패키지 로드

Expand All @@ -261,7 +170,7 @@ library(pls)
dat3 <- read.csv(file = "data/ch4_dat3.csv")
```

#### (예 4.16)
#### (예 4.14)

PLS 모형 추정

Expand Down Expand Up @@ -300,7 +209,7 @@ coef(pls_fit, intercept = TRUE)
```


#### (예 4.17)
#### (예 4.15)

학습데이터 생성

Expand Down
71 changes: 20 additions & 51 deletions final/ch04_ex05_svd.R
Original file line number Diff line number Diff line change
@@ -1,63 +1,32 @@
# ch04_ex05_svd.R
# ch4.3 Matrix decomposition

# load package
library(Matrix)

# load data
# centered matrix
x <- matrix(
c(
1, 2, 3, 2,
-1, 0, -1, -1,
0, -2, -2, -1
),
nrow = 3,
byrow = TRUE
)

print(x)
# ch4.4 Principal component score

# ex4-5

# dimension of x
n <- dim(x)[1]
k <- dim(x)[2]
# load data
dat1 <- read.csv(file = "data/ch4_dat1.csv")
dat1
# define as matrix
x <- as.matrix(dat1)

# singular value decomposition of x
s <- svd(x, nu = n, nv = k)
diag(s$d, nrow = n, ncol = k)
s <- svd(x)
diag(s$d)
s$u
s$v

# verify Theorem 4.8
all.equal(x, s$u %*% diag(s$d, nrow = n, ncol = k) %*% t(s$v))

# singular value decomposition of x'x
xtx <- t(x) %*% x # or `crossprod(x)`
print(xtx)
n_xtx <- dim(xtx)[1]
k_xtx <- dim(xtx)[2]
s_xtx <- svd(xtx, nu = n_xtx, nv = k_xtx)
diag(s_xtx$d, nrow = n_xtx, ncol = k_xtx)
s_xtx$u
s_xtx$v

# verify that SVD produces eigenvalue and eigenvector
xtx %*% s_xtx$v[, 2]
s_xtx$v[, 2] * s_xtx$d[2]

# ex4-6
# covariance matrix of x
cov(x)
svd(cov(x))
# correlation matrix of x
cor(x)
svd(cor(x))

# ex4-7
# principal component T=xp
x %*% s$v

# rank of matrix x
r <- rankMatrix(x)

# singular value decomposition of x
s <- svd(x, nu = r, nv = r)
diag(s$d[1:r])
s$u
s$v

# verify Theorem 4.9
all.equal(x, s$u %*% diag(s$d[1:r]) %*% t(s$v))
# ex4-8
# eigenvalue & eigenvector of x'x
eigen(t(x) %*% x)
35 changes: 0 additions & 35 deletions final/ch04_ex06_svd.R

This file was deleted.

4 changes: 2 additions & 2 deletions revised/ch04_ex12_pca.R → final/ch04_ex10_pca.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ch04_ex12_pca.R
# ch04_ex10_pca.R
# ch4.5 principal component - proportion of variance

# ex4.12
# ex4.10

# load data
# specify `fileEncoding` argument if needed
Expand Down
15 changes: 9 additions & 6 deletions final/ch04_ex14_pcr.R → final/ch04_ex12_pcr.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ch04_ex14_pcr.R
# ch04_ex12_pcr.R
# ch4.7 Principle Component Regression

# ex4-14
# ex4-12

# read csv file, centered data
dat3 <- read.csv(file = "data/ch4_dat3.csv")
Expand All @@ -10,17 +10,21 @@ dat3
# covert data frame to matrix
x <- as.matrix(dat3[, 1:3])


# PCA with centering and without scaling
pca_fit <- prcomp(x, center = TRUE, scale. = FALSE)
pca_fit <- prcomp(dat3[, 1:3], center = TRUE, scale. = FALSE)
pca_fit

# eigenvalue of cov(x)
pca_var <- pca_fit$sdev^2
pca_var

# same as above : eigenvalue of cov(x)
x <- as.matrix(dat3[, 1:3])
cov(x)
svd(cov(x))

# principal component score
PRC <- predict(pca_fit, x)
PRC <- predict(pca_fit, dat3[, 1:3])
PRC

# create training data with components
Expand All @@ -30,4 +34,3 @@ dat4 <- as.data.frame(cbind(PRC, y = dat3$y))
lm_fit <- lm(y ~ PC1 + PC2, data = dat4)
summary(lm_fit)
anova(lm_fit)

Loading

0 comments on commit 086396a

Please sign in to comment.