Upload the R packages needed to illustate
library("MVA")
## Loading required package: HSAUR2
## Loading required package: tools
set.seed(280875)
library("lattice")
lattice.options(default.theme =
function()
standard.theme("pdf", color = FALSE))
The data in this example consist of eight blood chemistry variables measured on 72 patients in a clinical trial
bc <- c(
0.290,
0.202, 0.415,
-0.055, 0.285, 0.419,
-0.105, -0.376, -0.521, -0.877,
-0.252, -0.349, -0.441, -0.076, 0.206,
-0.229, -0.164, -0.145, 0.023, 0.034, 0.192,
0.058, -0.129, -0.076, -0.131, 0.151, 0.077, 0.423)
blood_sd <- c(rblood = 0.371, plate = 41.253, wblood = 1.935,
neut = 0.077, lymph = 0.071, bilir = 4.037,
sodium = 2.732, potass = 0.297)
blood_corr <- diag(length(blood_sd)) / 2
blood_corr[upper.tri(blood_corr)] <- bc
blood_corr <- blood_corr + t(blood_corr)
blood_cov <- blood_corr * outer(blood_sd, blood_sd, "*")
blood_corr
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 1.000 0.290 0.202 -0.055 -0.105 -0.252 -0.229 0.058
## [2,] 0.290 1.000 0.415 0.285 -0.376 -0.349 -0.164 -0.129
## [3,] 0.202 0.415 1.000 0.419 -0.521 -0.441 -0.145 -0.076
## [4,] -0.055 0.285 0.419 1.000 -0.877 -0.076 0.023 -0.131
## [5,] -0.105 -0.376 -0.521 -0.877 1.000 0.206 0.034 0.151
## [6,] -0.252 -0.349 -0.441 -0.076 0.206 1.000 0.192 0.077
## [7,] -0.229 -0.164 -0.145 0.023 0.034 0.192 1.000 0.423
## [8,] 0.058 -0.129 -0.076 -0.131 0.151 0.077 0.423 1.000
blood_sd
## rblood plate wblood neut lymph bilir sodium potass
## 0.371 41.253 1.935 0.077 0.071 4.037 2.732 0.297
blood_pcacov <- princomp(covmat = blood_cov)
summary(blood_pcacov, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 41.2877486 3.880212624 2.64197339 1.624583979
## Proportion of Variance 0.9856182 0.008705172 0.00403574 0.001525986
## Cumulative Proportion 0.9856182 0.994323381 0.99835912 0.999885108
## Comp.5 Comp.6 Comp.7 Comp.8
## Standard deviation 0.353951757 2.561722e-01 8.510631e-02 2.372715e-02
## Proportion of Variance 0.000072436 3.794288e-05 4.187837e-06 3.255049e-07
## Cumulative Proportion 0.999957544 9.999955e-01 9.999997e-01 1.000000e+00
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## rblood 0.943 0.329
## plate 0.999
## wblood 0.192 0.981
## neut 0.758 0.650
## lymph -0.649 0.760
## bilir -0.961 0.195 0.191
## sodium -0.193 -0.979
## potass 0.329 -0.942
blood_pcacor <- princomp(covmat = blood_corr)
summary(blood_pcacor, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.6710100 1.2375848 1.1177138 0.88227419 0.78839505
## Proportion of Variance 0.3490343 0.1914520 0.1561605 0.09730097 0.07769584
## Cumulative Proportion 0.3490343 0.5404863 0.6966468 0.79394778 0.87164363
## Comp.6 Comp.7 Comp.8
## Standard deviation 0.69917350 0.66002394 0.31996216
## Proportion of Variance 0.06110545 0.05445395 0.01279697
## Cumulative Proportion 0.93274908 0.98720303 1.00000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## [1,] 0.194 0.417 0.400 0.652 0.175 0.363 0.176 0.102
## [2,] 0.400 0.154 0.168 -0.848 -0.230 -0.110
## [3,] 0.459 0.168 -0.274 0.251 -0.403 0.677
## [4,] 0.430 -0.472 -0.171 0.169 0.118 -0.237 0.678
## [5,] -0.494 0.360 -0.180 -0.139 -0.136 0.157 0.724
## [6,] -0.319 -0.320 -0.277 0.633 -0.162 -0.384 0.377
## [7,] -0.177 -0.535 0.410 -0.163 -0.299 0.513 0.367
## [8,] -0.171 -0.245 0.709 0.198 -0.469 -0.376
plot(blood_pcacor$sdev^2, xlab = "Component number",
ylab = "Component variance", type = "l", main = "Scree diagram")
plot(log(blood_pcacor$sdev^2), xlab = "Component number",
ylab = "log(Component variance)", type="l",
main = "Log(eigenvalue) diagram")
The data give the head lengths and head breadths (in millimetres) for each of the first two adult sons in 25 families.
"headsize" <-
matrix(c(191, 195, 181, 183, 176, 208, 189, 197, 188, 192, 179, 183, 174, 190, 188, 163, 195, 186, 181, 175, 192, 174,
176, 197, 190, 155, 149, 148, 153, 144, 157, 150, 159, 152, 150, 158, 147, 150, 159, 151, 137, 155, 153,
145, 140, 154, 143, 139, 167, 163, 179, 201, 185, 188, 171, 192, 190, 189, 197, 187, 186, 174, 185, 195,
187, 161, 183, 173, 182, 165, 185, 178, 176, 200, 187, 145, 152, 149, 149, 142, 152, 149, 152, 159, 151,
148, 147, 152, 157, 158, 130, 158, 148, 146, 137, 152, 147, 143, 158, 150),
nrow = 25, ncol = 4
, dimnames = list(character(0)
, c("head1", "breadth1", "head2", "breadth2")))
x <- headsize
headsize <- as.data.frame(headsize)
toLatex(HSAURtable(headsize), pcol = 2,
caption = "Head Size Data.",
label = "ch:PCA:headsize:tab", rownames = FALSE)
## \index{headsize data@\Robject{headsize} data}
## \begin{center}
## \begin{longtable}{ rrrr|rrrr }
## \caption{\Robject{headsize} data. Head Size Data. \label{ch:PCA:headsize:tab}}
## \\
## \hline
## \Robject{head1} & \Robject{breadth1} & \Robject{head2} & \Robject{breadth2} & \Robject{head1} & \Robject{breadth1} & \Robject{head2} & \Robject{breadth2} \\ \hline
## \endfirsthead
## \caption[]{\Robject{headsize} data (continued).} \\
## \hline
## \Robject{head1} & \Robject{breadth1} & \Robject{head2} & \Robject{breadth2} & \Robject{head1} & \Robject{breadth1} & \Robject{head2} & \Robject{breadth2} \\ \hline
## \endhead
## 191 & 155 & 179 & 145 & 190 & 159 & 195 & 157 \\
## 195 & 149 & 201 & 152 & 188 & 151 & 187 & 158 \\
## 181 & 148 & 185 & 149 & 163 & 137 & 161 & 130 \\
## 183 & 153 & 188 & 149 & 195 & 155 & 183 & 158 \\
## 176 & 144 & 171 & 142 & 186 & 153 & 173 & 148 \\
## 208 & 157 & 192 & 152 & 181 & 145 & 182 & 146 \\
## 189 & 150 & 190 & 149 & 175 & 140 & 165 & 137 \\
## 197 & 159 & 189 & 152 & 192 & 154 & 185 & 152 \\
## 188 & 152 & 197 & 159 & 174 & 143 & 178 & 147 \\
## 192 & 150 & 187 & 151 & 176 & 139 & 176 & 143 \\
## 179 & 158 & 186 & 148 & 197 & 167 & 200 & 158 \\
## 183 & 147 & 174 & 147 & 190 & 163 & 187 & 150 \\
## 174 & 150 & 185 & 152 & & & & \\
## \hline
## \end{longtable}
## \end{center}
headsize <- x
head_dat <- headsize[, c("head1", "head2")]
colMeans(head_dat)
## head1 head2
## 185.72 183.84
head_pca <- princomp(x = head_dat)
head_pca
## Call:
## princomp(x = head_dat)
##
## Standard deviations:
## Comp.1 Comp.2
## 12.690766 5.215406
##
## 2 variables and 25 observations.
print(summary(head_pca), loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2
## Standard deviation 12.6907660 5.2154059
## Proportion of Variance 0.8555135 0.1444865
## Cumulative Proportion 0.8555135 1.0000000
##
## Loadings:
## Comp.1 Comp.2
## head1 0.693 0.721
## head2 0.721 -0.693
s1 <- round(diag(cov(head_pca$scores))[1], 3)
s2 <- round(diag(cov(head_pca$scores))[2], 3)
s <- summary(head_pca)
l1 <- round(s$loadings[,1], 2)
l2 <- round(s$loadings[,2], 2)
diag(cov(head_pca$scores))
## Comp.1 Comp.2
## 167.76619 28.33381
a1 <- 183.84-0.721*185.72/0.693
b1 <- 0.721/0.693
a2 <- 183.84-(-0.693*185.72/0.721)
b2 <- -0.693/0.721
plot(head_dat, xlab = "First son's head length (mm)",
ylab = "Second son's head length")
abline(a1, b1)
abline(a2, b2, lty = 2)
xlim <- range(head_pca$scores[,1])
plot(head_pca$scores, xlim = xlim, ylim = xlim)
The pentathlon for women was first held in Germany in 1928. Initially this consisted of the shot put, long jump, 100 m, high jump, and javelin events, held over two days. In the 1964 Olympic Games, the pentathlon became the first combined Olympic event for women, consisting now of the 80 m hurdles, shot, high jump, long jump, and 200 m. In 1977, the 200 m was replaced by the 800 m run, and from 1981 the IAAF brought in the seven-event heptathlon in place of the pentathlon, with day one containing the events 100 m hurdles, shot, high jump, and 200 m run, and day two the long jump, javelin, and 800 m run. A scoring system is used to assign points to the results from each event, and the winner is the woman who accumulates the most points over the two days. The event made its first Olympic appearance in 1984.
In the 1988 Olympics held in Seoul, the heptathlon was won by one of the stars of women’s athletics in the USA, Jackie Joyner-Kersee. The results for all 25 competitors in all seven disciplines are given from Hand, Daly, Lunn, McConway, and Ostrowski 1994.
data("heptathlon",package="HSAUR2")
toLatex(HSAURtable(heptathlon), pcol = 1,
caption = "Results of Olympic heptathlon, Seoul, 1988.",
label = "ch:PCA:heptathlon:tab",
rownames = TRUE)
## \index{heptathlon data@\Robject{heptathlon} data}
## \begin{center}
## \begin{longtable}{l rrrrrrrr }
## \caption{\Robject{heptathlon} data. Results of Olympic heptathlon, Seoul, 1988. \label{ch:PCA:heptathlon:tab}}
## \\
## \hline
## & \Robject{hurdles} & \Robject{highjump} & \Robject{shot} & \Robject{run200m} & \Robject{longjump} & \Robject{javelin} & \Robject{run800m} & \Robject{score} \\ \hline
## \endfirsthead
## \caption[]{\Robject{heptathlon} data (continued).} \\
## \hline
## & \Robject{hurdles} & \Robject{highjump} & \Robject{shot} & \Robject{run200m} & \Robject{longjump} & \Robject{javelin} & \Robject{run800m} & \Robject{score} \\ \hline
## \endhead
## Joyner-Kersee (USA) & 12.69 & 1.86 & 15.80 & 22.56 & 7.27 & 45.66 & 128.51 & 7291 \\
## John (GDR) & 12.85 & 1.80 & 16.23 & 23.65 & 6.71 & 42.56 & 126.12 & 6897 \\
## Behmer (GDR) & 13.20 & 1.83 & 14.20 & 23.10 & 6.68 & 44.54 & 124.20 & 6858 \\
## Sablovskaite (URS) & 13.61 & 1.80 & 15.23 & 23.92 & 6.25 & 42.78 & 132.24 & 6540 \\
## Choubenkova (URS) & 13.51 & 1.74 & 14.76 & 23.93 & 6.32 & 47.46 & 127.90 & 6540 \\
## Schulz (GDR) & 13.75 & 1.83 & 13.50 & 24.65 & 6.33 & 42.82 & 125.79 & 6411 \\
## Fleming (AUS) & 13.38 & 1.80 & 12.88 & 23.59 & 6.37 & 40.28 & 132.54 & 6351 \\
## Greiner (USA) & 13.55 & 1.80 & 14.13 & 24.48 & 6.47 & 38.00 & 133.65 & 6297 \\
## Lajbnerova (CZE) & 13.63 & 1.83 & 14.28 & 24.86 & 6.11 & 42.20 & 136.05 & 6252 \\
## Bouraga (URS) & 13.25 & 1.77 & 12.62 & 23.59 & 6.28 & 39.06 & 134.74 & 6252 \\
## Wijnsma (HOL) & 13.75 & 1.86 & 13.01 & 25.03 & 6.34 & 37.86 & 131.49 & 6205 \\
## Dimitrova (BUL) & 13.24 & 1.80 & 12.88 & 23.59 & 6.37 & 40.28 & 132.54 & 6171 \\
## Scheider (SWI) & 13.85 & 1.86 & 11.58 & 24.87 & 6.05 & 47.50 & 134.93 & 6137 \\
## Braun (FRG) & 13.71 & 1.83 & 13.16 & 24.78 & 6.12 & 44.58 & 142.82 & 6109 \\
## Ruotsalainen (FIN) & 13.79 & 1.80 & 12.32 & 24.61 & 6.08 & 45.44 & 137.06 & 6101 \\
## Yuping (CHN) & 13.93 & 1.86 & 14.21 & 25.00 & 6.40 & 38.60 & 146.67 & 6087 \\
## Hagger (GB) & 13.47 & 1.80 & 12.75 & 25.47 & 6.34 & 35.76 & 138.48 & 5975 \\
## Brown (USA) & 14.07 & 1.83 & 12.69 & 24.83 & 6.13 & 44.34 & 146.43 & 5972 \\
## Mulliner (GB) & 14.39 & 1.71 & 12.68 & 24.92 & 6.10 & 37.76 & 138.02 & 5746 \\
## Hautenauve (BEL) & 14.04 & 1.77 & 11.81 & 25.61 & 5.99 & 35.68 & 133.90 & 5734 \\
## Kytola (FIN) & 14.31 & 1.77 & 11.66 & 25.69 & 5.75 & 39.48 & 133.35 & 5686 \\
## Geremias (BRA) & 14.23 & 1.71 & 12.95 & 25.50 & 5.50 & 39.64 & 144.02 & 5508 \\
## Hui-Ing (TAI) & 14.85 & 1.68 & 10.00 & 25.23 & 5.47 & 39.14 & 137.30 & 5290 \\
## Jeong-Mi (KOR) & 14.53 & 1.71 & 10.83 & 26.61 & 5.50 & 39.26 & 139.17 & 5289 \\
## Launa (PNG) & 16.42 & 1.50 & 11.78 & 26.16 & 4.88 & 46.38 & 163.43 & 4566 \\
## \hline
## \end{longtable}
## \end{center}
heptathlon$hurdles <- with(heptathlon, max(hurdles)-hurdles)
heptathlon$run200m <- with(heptathlon, max(run200m)-run200m)
heptathlon$run800m <- with(heptathlon, max(run800m)-run800m)
score <- which(colnames(heptathlon) == "score")
round(cor(heptathlon[,-score]), 2)
## hurdles highjump shot run200m longjump javelin run800m
## hurdles 1.00 0.81 0.65 0.77 0.91 0.01 0.78
## highjump 0.81 1.00 0.44 0.49 0.78 0.00 0.59
## shot 0.65 0.44 1.00 0.68 0.74 0.27 0.42
## run200m 0.77 0.49 0.68 1.00 0.82 0.33 0.62
## longjump 0.91 0.78 0.74 0.82 1.00 0.07 0.70
## javelin 0.01 0.00 0.27 0.33 0.07 1.00 -0.02
## run800m 0.78 0.59 0.42 0.62 0.70 -0.02 1.00
plot(heptathlon[,-score])
plot(heptathlon[,-score], pch = ".", cex = 1.5)
heptathlon <- heptathlon[-grep("PNG", rownames(heptathlon)),]
score <- which(colnames(heptathlon) == "score")
round(cor(heptathlon[,-score]), 2)
## hurdles highjump shot run200m longjump javelin run800m
## hurdles 1.00 0.58 0.77 0.83 0.89 0.33 0.56
## highjump 0.58 1.00 0.46 0.39 0.66 0.35 0.15
## shot 0.77 0.46 1.00 0.67 0.78 0.34 0.41
## run200m 0.83 0.39 0.67 1.00 0.81 0.47 0.57
## longjump 0.89 0.66 0.78 0.81 1.00 0.29 0.52
## javelin 0.33 0.35 0.34 0.47 0.29 1.00 0.26
## run800m 0.56 0.15 0.41 0.57 0.52 0.26 1.00
plot(heptathlon[,-score], pch = ".", cex = 1.5)
op <- options(digits = 2)
heptathlon_pca <- prcomp(heptathlon[, -score], scale = TRUE)
print(heptathlon_pca)
## Standard deviations (1, .., p=7):
## [1] 2.08 0.95 0.91 0.68 0.55 0.34 0.26
##
## Rotation (n x k) = (7 x 7):
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## hurdles -0.45 0.058 -0.17 0.048 -0.199 0.847 -0.070
## highjump -0.31 -0.651 -0.21 -0.557 0.071 -0.090 0.332
## shot -0.40 -0.022 -0.15 0.548 0.672 -0.099 0.229
## run200m -0.43 0.185 0.13 0.231 -0.618 -0.333 0.470
## longjump -0.45 -0.025 -0.27 -0.015 -0.122 -0.383 -0.749
## javelin -0.24 -0.326 0.88 0.060 0.079 0.072 -0.211
## run800m -0.30 0.657 0.19 -0.574 0.319 -0.052 0.077
a1 <- heptathlon_pca$rotation[,1]
center <- heptathlon_pca$center
scale <- heptathlon_pca$scale
hm <- as.matrix(heptathlon[,-score])
drop(scale(hm, center = center, scale = scale) %*%
heptathlon_pca$rotation[,1])
## Joyner-Kersee (USA) John (GDR) Behmer (GDR) Sablovskaite (URS)
## -4.758 -3.148 -2.926 -1.288
## Choubenkova (URS) Schulz (GDR) Fleming (AUS) Greiner (USA)
## -1.503 -0.958 -0.953 -0.633
## Lajbnerova (CZE) Bouraga (URS) Wijnsma (HOL) Dimitrova (BUL)
## -0.382 -0.522 -0.218 -1.076
## Scheider (SWI) Braun (FRG) Ruotsalainen (FIN) Yuping (CHN)
## 0.003 0.109 0.209 0.233
## Hagger (GB) Brown (USA) Mulliner (GB) Hautenauve (BEL)
## 0.660 0.757 1.881 1.828
## Kytola (FIN) Geremias (BRA) Hui-Ing (TAI) Jeong-Mi (KOR)
## 2.118 2.771 3.901 3.897
predict(heptathlon_pca)[,1]
## Joyner-Kersee (USA) John (GDR) Behmer (GDR) Sablovskaite (URS)
## -4.758 -3.148 -2.926 -1.288
## Choubenkova (URS) Schulz (GDR) Fleming (AUS) Greiner (USA)
## -1.503 -0.958 -0.953 -0.633
## Lajbnerova (CZE) Bouraga (URS) Wijnsma (HOL) Dimitrova (BUL)
## -0.382 -0.522 -0.218 -1.076
## Scheider (SWI) Braun (FRG) Ruotsalainen (FIN) Yuping (CHN)
## 0.003 0.109 0.209 0.233
## Hagger (GB) Brown (USA) Mulliner (GB) Hautenauve (BEL)
## 0.660 0.757 1.881 1.828
## Kytola (FIN) Geremias (BRA) Hui-Ing (TAI) Jeong-Mi (KOR)
## 2.118 2.771 3.901 3.897
sdev <- heptathlon_pca$sdev
prop12 <- round(sum(sdev[1:2]^2)/sum(sdev^2)*100, 0)
plot(heptathlon_pca)
plot(heptathlon_pca, main = "")
cor(heptathlon$score, heptathlon_pca$x[,1])
## [1] -0.99
plot(heptathlon$score, heptathlon_pca$x[,1])
The air pollution data were originally collected to investigate the determinants of pol- lution, presumably by regressing SO2 on the six other variables.
data("USairpollution", package = "HSAUR2")
panel.hist <- function(x, ...) {
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="grey", ...)
}
USairpollution$negtemp <- USairpollution$temp * (-1)
USairpollution$temp <- NULL
pairs(USairpollution[,-1], diag.panel = panel.hist,
pch = ".", cex = 1.5)
cor(USairpollution[,-1])
## manu popul wind precip predays negtemp
## manu 1.000 0.955 0.238 -0.032 0.132 0.190
## popul 0.955 1.000 0.213 -0.026 0.042 0.063
## wind 0.238 0.213 1.000 -0.013 0.164 0.350
## precip -0.032 -0.026 -0.013 1.000 0.496 -0.386
## predays 0.132 0.042 0.164 0.496 1.000 0.430
## negtemp 0.190 0.063 0.350 -0.386 0.430 1.000
usair_pca <- princomp(USairpollution[,-1], cor = TRUE)
summary(usair_pca, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Standard deviation 1.48 1.22 1.18 0.87 0.338 0.1856
## Proportion of Variance 0.37 0.25 0.23 0.13 0.019 0.0057
## Cumulative Proportion 0.37 0.62 0.85 0.98 0.994 1.0000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## manu 0.612 0.168 0.273 0.137 0.102 0.703
## popul 0.578 0.222 0.350 -0.695
## wind 0.354 -0.131 -0.297 -0.869 -0.113
## precip -0.623 0.505 -0.171 0.568
## predays 0.238 -0.708 0.311 -0.580
## negtemp 0.330 -0.128 -0.672 0.306 0.558 -0.136
pairs(usair_pca$scores[,1:3], ylim = c(-6, 4), xlim = c(-6, 4),
panel = function(x,y, ...) {
text(x, y, abbreviate(row.names(USairpollution)),
cex = 0.6)
bvbox(cbind(x,y), add = TRUE)
})
out <- sapply(1:6, function(i) {
plot(USairpollution$SO2,usair_pca$scores[,i],
xlab = paste("PC", i, sep = ""),
ylab = "Sulphur dioxide concentration")
})
usair_reg <- lm(SO2 ~ usair_pca$scores,
data = USairpollution)
summary(usair_reg)
##
## Call:
## lm(formula = SO2 ~ usair_pca$scores, data = USairpollution)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.00 -8.54 -0.99 5.76 48.76
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.049 2.286 13.15 6.9e-15 ***
## usair_pca$scoresComp.1 9.942 1.542 6.45 2.3e-07 ***
## usair_pca$scoresComp.2 -2.240 1.866 -1.20 0.2384
## usair_pca$scoresComp.3 0.375 1.936 0.19 0.8475
## usair_pca$scoresComp.4 8.549 2.622 3.26 0.0025 **
## usair_pca$scoresComp.5 15.176 6.753 2.25 0.0312 *
## usair_pca$scoresComp.6 39.271 12.316 3.19 0.0031 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15 on 34 degrees of freedom
## Multiple R-squared: 0.67, Adjusted R-squared: 0.611
## F-statistic: 11.5 on 6 and 34 DF, p-value: 5.42e-07
biplot(heptathlon_pca, col = c("gray", "black"))
tmp <- heptathlon[, -score]
rownames(tmp) <- abbreviate(gsub(" \\(.*", "", rownames(tmp)))
biplot(prcomp(tmp, scale = TRUE), col = c("black", "darkgray"), xlim =
+ c(-0.5, 0.7), cex = 0.7)
headsize.std <- sweep(headsize, 2,
apply(headsize, 2, sd), FUN = "/")
R <- cor(headsize.std)
r11 <- R[1:2, 1:2]
r22 <- R[-(1:2), -(1:2)]
r12 <- R[1:2, -(1:2)]
r21 <- R[-(1:2), 1:2]
(E1 <- solve(r11) %*% r12 %*% solve(r22) %*%r21)
## head1 breadth1
## head1 0.32 0.32
## breadth1 0.30 0.30
(E2 <- solve(r22) %*% r21 %*% solve(r11) %*%r12)
## head2 breadth2
## head2 0.30 0.30
## breadth2 0.32 0.32
(e1 <- eigen(E1))
## eigen() decomposition
## $values
## [1] 0.6217 0.0029
##
## $vectors
## [,1] [,2]
## [1,] 0.73 -0.70
## [2,] 0.69 0.71
(e2 <- eigen(E2))
## eigen() decomposition
## $values
## [1] 0.6217 0.0029
##
## $vectors
## [,1] [,2]
## [1,] -0.68 -0.71
## [2,] -0.73 0.71
girth1 <- headsize.std[,1:2] %*% e1$vectors[,1]
girth2 <- headsize.std[,3:4] %*% e2$vectors[,1]
shape1 <- headsize.std[,1:2] %*% e1$vectors[,2]
shape2 <- headsize.std[,3:4] %*% e2$vectors[,2]
(g <- cor(girth1, girth2))
## [,1]
## [1,] -0.79
(s <- cor(shape1, shape2))
## [,1]
## [1,] 0.054
plot(girth1, girth2)
plot(shape1, shape2)
The data for this example arise from a study of depression amongst 294 respondents in Los An- geles. The two sets of variables of interest were “health variables”, namely the CESD (the sum of 20 separate numerical scales measuring different aspects of depression) and a measure of general health and “personal” variables, of which there were four: gender, age, income, and educational level (numerically coded from the lowest “less than high school”, to the highest, “finished doctorate”).
depr <- c(
0.212,
0.124, 0.098,
-0.164, 0.308, 0.044,
-0.101, -0.207, -0.106, -0.208,
-0.158, -0.183, -0.180, -0.192, 0.492)
LAdepr <- diag(6) / 2
LAdepr[upper.tri(LAdepr)] <- depr
LAdepr <- LAdepr + t(LAdepr)
rownames(LAdepr) <- colnames(LAdepr) <- c("CESD", "Health", "Gender", "Age", "Edu", "Income")
x <- LAdepr
LAdepr <- as.data.frame(LAdepr)
toLatex(HSAURtable(LAdepr),
caption = "Los Angeles Depression Data.",
label = "ch:PCA:LAdepr:tab", rownames = FALSE)
## \index{LAdepr data@\Robject{LAdepr} data}
## \begin{center}
## \begin{longtable}{ rrrrrr }
## \caption{\Robject{LAdepr} data. Los Angeles Depression Data. \label{ch:PCA:LAdepr:tab}}
## \\
## \hline
## \Robject{CESD} & \Robject{Health} & \Robject{Gender} & \Robject{Age} & \Robject{Edu} & \Robject{Income} \\ \hline
## \endfirsthead
## \caption[]{\Robject{LAdepr} data (continued).} \\
## \hline
## \Robject{CESD} & \Robject{Health} & \Robject{Gender} & \Robject{Age} & \Robject{Edu} & \Robject{Income} \\ \hline
## \endhead
## 1.000 & 0.212 & 0.124 & -0.164 & -0.101 & -0.158 \\
## 0.212 & 1.000 & 0.098 & 0.308 & -0.207 & -0.183 \\
## 0.124 & 0.098 & 1.000 & 0.044 & -0.106 & -0.180 \\
## -0.164 & 0.308 & 0.044 & 1.000 & -0.208 & -0.192 \\
## -0.101 & -0.207 & -0.106 & -0.208 & 1.000 & 0.492 \\
## -0.158 & -0.183 & -0.180 & -0.192 & 0.492 & 1.000 \\
## \hline
## \end{longtable}
## \end{center}
LAdepr <- x
r11 <- LAdepr[1:2, 1:2]
r22 <- LAdepr[-(1:2), -(1:2)]
r12 <- LAdepr[1:2, -(1:2)]
r21 <- LAdepr[-(1:2), 1:2]
(E1 <- solve(r11) %*% r12 %*% solve(r22) %*%r21)
## CESD Health
## CESD 0.084 -0.043
## Health -0.033 0.133
(E2 <- solve(r22) %*% r21 %*% solve(r11) %*%r12)
## Gender Age Edu Income
## Gender 0.0155 -0.0015 -0.018 -0.022
## Age -0.0024 0.1471 -0.040 -0.016
## Edu -0.0149 -0.0260 0.025 0.025
## Income -0.0212 0.0130 0.022 0.029
(e1 <- eigen(E1))
## eigen() decomposition
## $values
## [1] 0.153 0.063
##
## $vectors
## [,1] [,2]
## [1,] 0.53 -0.91
## [2,] -0.85 -0.42
(e2 <- eigen(E2))
## eigen() decomposition
## $values
## [1] 1.5e-01 6.3e-02 -5.2e-18 2.9e-18
##
## $vectors
## [,1] [,2] [,3] [,4]
## [1,] 0.0026 0.49 0.15 -0.85
## [2,] 0.9801 -0.32 -0.11 -0.16
## [3,] -0.1858 -0.43 -0.70 -0.47
## [4,] 0.0699 -0.69 0.69 -0.19