# December 3, 2024


library(glmnet)

### Example 1

data(QuickStartExample)
x <- QuickStartExample$x
y <- QuickStartExample$y
dim(x)

fit <- glmnet(x, y)
plot(fit)
print(fit)

coef(fit,s=0.1)

set.seed(29)
nx <- matrix(rnorm(5 * 20), 5, 20)
predict(fit, newx = nx, s = c(0.1, 0.05))

cvfit <- cv.glmnet(x, y)
plot(cvfit)
cvfit$lambda.min
coef(cvfit, s = "lambda.min")
predict(cvfit, newx = x[1:5,], s = "lambda.min")

coef(cvfit, s = "lambda.min")!=0
(coef(cvfit, s = "lambda.min")!=0)[-1]
summary(lm(y~x[,(coef(cvfit, s = "lambda.min")!=0)[-1]]))

as.numeric(coef(lm(y~x[,(coef(cvfit, s = "lambda.min")!=0)[-1]])))
as.numeric(coef(cvfit, s = "lambda.min"))

plot(cvfit)

cvfit$lambda.1se
coef(cvfit, s = "lambda.1se")
summary(lm(y~x[,(coef(cvfit, s = "lambda.1se")!=0)[-1]]))

as.numeric(coef(lm(y~x[,(coef(cvfit, s = "lambda.1se")!=0)[-1]])))
as.numeric(coef(cvfit, s = "lambda.1se"))


### Example 2

fit0 <- glmnet(x, y, alpha=0)
plot(fit0)
plot(fit0, xvar = "lambda", label = TRUE)
print(fit0)

cvfit0 <- cv.glmnet(x, y, alpha=0)
plot(cvfit0)
cvfit0$lambda.min
coef(cvfit0, s = "lambda.min")
predict(cvfit0, newx = x[1:5,], s = "lambda.min")


### Example 3

wts <-  c(rep(1,50), rep(2,50))
fit <- glmnet(x, y, alpha = 0.2, weights = wts, nlambda = 20)

print(fit)

plot(fit, xvar = "lambda", label = TRUE)
plot(fit, xvar = "dev", label = TRUE)


### Example 4

fit <- glmnet(x, y)
any(fit$lambda == 0.5)  # 0.5 not in original lambda sequence

coef.apprx <- coef(fit, s = 0.5, exact = FALSE)
coef.exact <- coef(fit, s = 0.5, exact = TRUE, x=x, y=y)
cbind2(coef.exact[which(coef.exact != 0)], 
       coef.apprx[which(coef.apprx != 0)])

cbind2(coef.exact, coef.apprx)

predict(fit, newx = x[1:5,], type = "response", s = 0.05)

plot(fit, xvar = "lambda", label = TRUE)
plot(fit, xvar = "dev", label = TRUE)


### Example 5

cvfit <- cv.glmnet(x, y, type.measure = "mse", nfolds = 20)
print(cvfit)

# # Parellel Computing
# library(doMC)
# registerDoMC(cores = 2)
# X <- matrix(rnorm(1e4 * 200), 1e4, 200)
# Y <- rnorm(1e4)
# system.time(cv.glmnet(X, Y))
# system.time(cv.glmnet(X, Y, parallel = TRUE))

cvfit$lambda.min
predict(cvfit, newx = x[1:5,], s = "lambda.min")
coef(cvfit, s = "lambda.min")

foldid <- sample(1:10, size = length(y), replace = TRUE)
cv1  <- cv.glmnet(x, y, foldid = foldid, alpha = 1)
cv.5 <- cv.glmnet(x, y, foldid = foldid, alpha = 0.5)
cv0  <- cv.glmnet(x, y, foldid = foldid, alpha = 0)

par(mfrow = c(2,2))
plot(cv1); plot(cv.5); plot(cv0)
plot(log(cv1$lambda)   , cv1$cvm , pch = 19, col = "red",
     xlab = "log(Lambda)", ylab = cv1$name)
points(log(cv.5$lambda), cv.5$cvm, pch = 19, col = "grey")
points(log(cv0$lambda) , cv0$cvm , pch = 19, col = "blue")
legend("topleft", legend = c("alpha= 1", "alpha= .5", "alpha 0"),
       pch = 19, col = c("red","grey","blue"))


### Example 6

tfit <- glmnet(x, y, lower.limits = -0.7, upper.limits = 0.5)
plot(tfit)

p.fac <- rep(1, 20)
p.fac[c(1, 3, 5)] <- 0
pfit <- glmnet(x, y, penalty.factor = p.fac)
plot(pfit, label = TRUE)


### Example 7
### Logistic

data(BinomialExample)
x <- BinomialExample$x
y <- BinomialExample$y
dim(x)

fit <- glmnet(x, y, family = "binomial")
plot(fit, xvar='dev')

predict(fit, newx = x[1:5,], type = "class", s = c(0.05, 0.01))

cvfit <- cv.glmnet(x, y, family = "binomial", type.measure = "class")
plot(cvfit)

cvfit$lambda.min
cvfit$lambda.1se

cv.glmnet(x, y, family = "binomial")
cv.glmnet(x, y, family = "binomial", type.measure = "deviance")
cvfit

cvfit<-cv.glmnet(x, y, family = "binomial")

coef(cvfit, s = "lambda.min")
coef(cvfit, s = "lambda.1se")


### Example 8
### Poisson

data(PoissonExample)
x <- PoissonExample$x
y <- PoissonExample$y

fit <- glmnet(x, y, family = "poisson")
plot(fit)

coef(fit, s = 1)
predict(fit, newx = x[1:5,], type = "response", s = c(0.1,1))

cvfit <- cv.glmnet(x, y, family = "poisson")


### Example 9
### PCA

?prcomp

USArrests

prcomp(USArrests)  # inappropriate
prcomp(USArrests, scale = TRUE)
prcomp(~ Murder + Assault + Rape, 
       data = USArrests, scale = TRUE)
plot(prcomp(USArrests))
summary(prcomp(USArrests, scale = TRUE))
biplot(prcomp(USArrests, scale = TRUE))


### Example 10
### PCR

longley
?longley

# A macroeconomic data set which provides a well-known example 
# for a highly collinear regression.

longley.x <- data.matrix(longley[, 1:6])
longley.y <- longley[, "Employed"]
pairs(longley, main = "longley data")
summary(fm1 <- lm(Employed ~ ., data = longley))

sfm1<-step(fm1)
summary(sfm1)

prcomp(~ .-Employed, data = longley)
summary(prcomp(~ .-Employed, data = longley))

prcomp(~longley.x)
summary(prcomp(~longley.x))

prcomp(~ .-Employed, data = longley)$rotation
prcomp(~ .-Employed, data = longley)$x

summary(lm(Employed~prcomp(~ .-Employed, data=longley)$x[,1:2], 
           data = longley))

# Make it less messy
longley.pcx<-prcomp(~ .-Employed, data = longley)$x
summary(lm(Employed~longley.pcx[,1:2], data = longley))