# Oct 1, 2024 ### Example 1 ### Similar to Sep 24, Examples 1 and 2, with more added x<-c(1,2,3,4,5) y<-c(1.9, 2.2, 2.9, 3.2, 5.2) x y X<-matrix(c(rep(1,5),x),ncol=2) X bhat<-solve(t(X)%*%X)%*%t(X)%*%y bhat beta0hat<-bhat[1,1] beta1hat<-bhat[2,1] yhat<-X%*%bhat yhat ehat<-y-yhat ehat rss<-t(ehat)%*%(ehat) rss t(y)%*%y - t(yhat)%*%yhat sigma2hat<-rss/(5-1-1) sigma2hat #sigma2hat*solve(t(X)%*%X) sigma2hat[1,1]*solve(t(X)%*%X) varhatbhat<-sigma2hat[1,1]*solve(t(X)%*%X) se0<-sqrt(varhatbhat[1,1]) se1<-sqrt(varhatbhat[2,2]) cov01<-varhatbhat[1,2] sxx<-sum((x-mean(x))^2) sxx -sigma2hat[1,1]*mean(x)/sxx cov01 t0<-beta0hat/se0 t1<-beta1hat/se1 t0 t1 pt(abs(t0),5-1-1,lower.tail=F)*2 pt(abs(t1),5-1-1,lower.tail=F)*2 syy<-sum((y-mean(y))^2) ssreg<-syy-rss r2<-1-rss/syy adjr2<-1-(rss/(5-1-1))/(syy/(5-1)) r2 adjr2 1-(1-r2)*(5-1)/(5-1-1) # lm summary(lm(y~x)) # residual se sqrt(sigma2hat) # F-stat (ssreg/1)/(rss/(5-1-1)) (ssreg/1)/sigma2hat fstat<-(ssreg/1)/sigma2hat fstat pf(fstat,1,5-2,lower.tail = F) # More on lm() lm(y~x) summary(lm(y~x)) ex1.lm<-lm(y~x) summary(ex1.lm) anova(ex1.lm) plot(x,y) abline(ex1.lm) plot(x,y, xlim=c(0,6), ylim=c(0,6)) abline(ex1.lm) abline(v=0) plot(x,y, xlim=c(0,6), ylim=c(0,6), xlab="X", ylab="Y") plot(x,y, xlim=c(0,6), ylim=c(0,6), xlab="X", ylab="Y", main="X versus Y") plot(x,y, xlim=c(0,6), ylim=c(0,6), xlab="X", ylab="Y", main="X versus Y", pch=16) plot(x,y, xlim=c(0,6), ylim=c(0,6), xlab="X", ylab="Y", main="X versus Y", pch="$") plot(x,y, xlim=c(0,6), ylim=c(0,6), xlab="X", ylab="Y", main="X versus Y") abline(ex1.lm, lty=3) plot(x,y, xlim=c(0,6), ylim=c(0,6), xlab="X", ylab="Y", main="X versus Y") abline(ex1.lm, lwd=2) plot(x,y, xlim=c(0,6), ylim=c(0,6), xlab="X", ylab="Y", main="X versus Y") abline(ex1.lm, col=2) plot(x,y, xlim=c(0,6), ylim=c(0,6), xlab="X", ylab="Y", main="X versus Y") abline(ex1.lm, col="red") ### Example 2 (ALR) ### See also ALR4 Primer library(alr4) fuel2001 ?fuel2001 f2001<-transform(fuel2001, Dlic=1000*Drivers/Pop, Fuel=1000*FuelC/Pop, Income=Income/1000, logMiles = log(Miles)) fuel<- f2001[ ,c(7, 8, 3, 10, 9)] summary(fuel) pairs(fuel) cor(fuel) summary(lm(Fuel~Tax+Dlic+Income+logMiles, data=fuel)) y<-fuel$Fuel x1<-fuel$Tax x2<-fuel$Dlic x3<-fuel$Income x4<-fuel$logMiles # log2(fuel2001$Miles) n<-length(y) p<-4 X<-matrix(c(rep(1,n),x1,x2,x3,x4),ncol=p+1) X bhat<-solve(t(X)%*%X)%*%t(X)%*%y bhat yhat<-X%*%bhat yhat ehat<-y-yhat ehat summary(ehat) rss<-t(ehat)%*%(ehat) rss t(y)%*%y - t(yhat)%*%yhat sigma2hat<-rss/(n-p-1) sigma2hat sqrt(sigma2hat) n-p-1 varhatbhat<-sigma2hat[1,1]*solve(t(X)%*%X) varhatbhat sebhat<-sqrt(diag(varhatbhat)) sebhat tstat<-bhat/sebhat tstat pt(abs(tstat),n-p-1,lower.tail=F)*2 syy<-sum((y-mean(y))^2) ssreg<-syy-rss r2<-1-rss/syy adjr2<-1-(rss/(n-p-1))/(syy/(n-1)) r2 adjr2 1-(1-r2)*(n-1)/(n-p-1) # F-stat fstat<-(ssreg/p)/sigma2hat fstat pf(fstat,p,n-p-1,lower.tail = F) # More fuel.lm<-lm(Fuel~Tax+Dlic+Income+logMiles, data=fuel) summary(fuel.lm) anova(fuel.lm) plot(fuel.lm) fuel.fit<-fitted(fuel.lm) fuel.resid<-resid(fuel.lm) fuel.fit yhat fuel.fit-yhat sum(fuel.fit-yhat) fuel.resid ehat fuel.resid-ehat sum(fuel.resid-ehat) plot(fuel.fit, fuel.resid) abline(h=0, lty=2) plot(fuel.fit, fuel.resid, xlab="Fitted Value", ylab="Residual", main="Residual Plot") abline(h=0, lty=2) # Prediction # 95% at Tax=20, Dlic=800, Income=30, logMiles=10 xstar<-c(1, 20, 800, 30, 10) ystar<-t(xstar)%*%bhat ystar sepred<-sqrt(sigma2hat*(1+t(xstar)%*%solve(t(X)%*%X)%*%xstar)) ystar-qt(0.025,n-p-1,lower.tail=F)*sepred ystar+qt(0.025,n-p-1,lower.tail=F)*sepred ### Example 3 setwd("~/Class/MATH5910/Fall2024/R") wateruse<-read.table("wateruse.txt", header=T) wateruse # VARIABLE DESCRIPTION # # Temperature Average monthly temperate (F) # Production Amount of production (M pounds) # Days Number of plant operating days in the month # Persons Number of persons on the monthly plant payroll # Water Monthly water usage (gallons) # # We are interested in the effect of the first 4 variables on Water. summary(wateruse) pairs(wateruse) cor(wateruse) summary(lm(Water ~ . , data=wateruse)) # Now, if we are only interested in the effects of # Temperature, Days, Persons on Water summary(wateruse[ ,c('Temperature', 'Days', 'Persons', 'Water')]) pairs(wateruse[ ,c('Temperature', 'Days', 'Persons', 'Water')]) cor(wateruse[ ,c('Temperature', 'Days', 'Persons', 'Water')]) summary(lm(Water ~ Temperature + Days + Persons, data=wateruse)) ### Example 4 ### Polynomial Regression physics plot(y~x, data=physics) physics1.lm<-lm(y~x, data=physics) summary(physics1.lm) plot(y~x, data=physics) abline(physics1.lm) fit.physics1<-fitted(physics1.lm) resid.physics1<-resid(physics1.lm) plot(fit.physics1,resid.physics1) abline(h=0, lty=2) summary(lm(y~x+x^2, data=physics)) physics2.lm<-lm(y~x+I(x^2), data=physics) summary(physics2.lm) fit.physics2<-fitted(physics2.lm) resid.physics2<-resid(physics2.lm) plot(fit.physics2,resid.physics2) abline(h=0, lty=2) plot(y~x, data=physics) abline(physics1.lm) abline(physics2.lm, col=2) plot(y~x, data=physics) abline(physics1.lm) lines(physics$x, fit.physics2, col=2) plot(y~x, data=physics) abline(physics1.lm) lines(physics$x, fit.physics2, lty=2) physics3.lm<-lm(y~x+I(x^2)+I(x^3), data=physics) summary(physics3.lm) fit.physics3<-fitted(physics3.lm) resid.physics3<-resid(physics3.lm) plot(fit.physics3,resid.physics3) abline(h=0, lty=2) plot(y~x, data=physics) abline(physics1.lm) lines(physics$x, fit.physics2, col=2) lines(physics$x, fit.physics3, col=4) plot(y~x, data=physics) abline(physics1.lm) lines(physics$x, fit.physics2, lty=2) lines(physics$x, fit.physics3, lty=3) # Matrix Computation y<-physics$y x<-physics$x n<-length(y) p<-2 X<-matrix(c(rep(1,n),x,x^2),ncol=3) X bhat<-solve(t(X)%*%X)%*%t(X)%*%y bhat summary(physics2.lm) ### Example 5 ### from textbook cakes ?cakes cakes.lm<-lm(Y~X1+X2+I(X1^2)+I(X2^2)+X1:X2, data=cakes) summary(cakes.lm) coef(cakes.lm) y<-cakes$Y x1<-cakes$X1 x2<-cakes$X2 X<-matrix(c(rep(1,nrow(cakes)),x1,x2,x1^2,x2^2,x1*x2),ncol=6) X bhat<-solve(t(X)%*%X)%*%t(X)%*%y X<-matrix(c(rep(10,nrow(cakes)),x1,x2,x1^2,x2^2,x1*x2),ncol=6) X bhat<-solve(t(X)%*%X)%*%t(X)%*%y bhat coef(cakes.lm)