mileage <- read.table("C:/Rexamples/fegsub_data.txt", header=T) View(mileage) summary(mileage) mileage1 <- subset(mileage, size <= 2.5, select = c(city, size)) View(mileage1) summary(mileage1) attach(mileage1) displ <- 10*size regression1 <- lm(city ~ displ) summary(regression1) anova(regression1) attach(regression1) output1 <- cbind(displ,city,fitted.values,residuals) View(output1) summary(output1) matplot(displ, cbind(city,fitted.values), pch="*", type="pl") matplot(displ, cbind(residuals,0), pch="*", type="pl") --------------------- NOTE ------------------------ The first line above which reads the data from the file "fegsub_data.txt" assumes that this file is in the directory "C:/Rexamples". Obey this convention or modify the command. -------------------------- results ---------------------------------- The remainder of this file is copied and pasted from the RStudio command window. -------------------------------------------------------- ------------------ EXTRAS --------------------------- ---here are some extra commands which may be of interest --------- regr1_clim <- predict(regression1, se.fit=TRUE, interval="confidence") regr1_plim <- predict(regression1, se.fit=TRUE, interval="prediction") regr1_estimates <- cbind(regr1_clim$fit, regr1_plim$fit[,2:3]) colnames(regr1_estimates) <- c("fit","lclm","uclm","lclp","uclp") # fitted values conf limits for the mean and for a prediction # lclm,uclm confidence interval for mean response # lclp,uclp prediction interval for one observation regr1_estimates matplot(displ, cbind(city,fitted.values), pch="*", type="pl") matplot(displ, cbind(residuals,0), pch="*", type="pl") matplot(displ, regr1_estimates, lty = c(1,2,2,3,3), type = "l") plot(regression1) --------------- END EXTRAS --------------------------- --------------- output -------------------------- > mileage <- read.table("C:/Rexamples/fegsub_data.txt", header=T) > View(mileage) > summary(mileage) city highway trans Min. :10.00 Min. :16.00 Min. :1.000 1st Qu.:19.00 1st Qu.:26.00 1st Qu.:1.000 Median :22.50 Median :30.00 Median :1.000 Mean :22.39 Mean :29.77 Mean :1.232 3rd Qu.:25.00 3rd Qu.:33.25 3rd Qu.:1.000 Max. :39.00 Max. :46.00 Max. :2.000 size cyl Min. :1.00 Min. : 3.000 1st Qu.:1.80 1st Qu.: 4.000 Median :2.30 Median : 4.000 Mean :2.93 Mean : 5.339 3rd Qu.:3.80 3rd Qu.: 6.000 Max. :6.80 Max. :12.000 > mileage1 <- subset(mileage, size <= 2.5, select = c(city, size)) > View(mileage1) > summary(mileage1) city size Min. :19.00 Min. :1.000 1st Qu.:23.00 1st Qu.:1.800 Median :24.00 Median :2.000 Mean :25.77 Mean :1.946 3rd Qu.:28.00 3rd Qu.:2.200 Max. :39.00 Max. :2.500 > attach(mileage1) > displ <- 10*size > regression1 <- lm(city ~ displ) > summary(regression1) Call: lm(formula = city ~ displ) Residuals: Min 1Q Median 3Q Max -4.5229 -1.4119 0.2851 1.7701 2.6891 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 49.1590 1.8722 26.26 < 2e-16 *** displ -1.2020 0.0946 -12.71 2.93e-14 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 2.026 on 33 degrees of freedom Multiple R-squared: 0.8303, Adjusted R-squared: 0.8252 F-statistic: 161.5 on 1 and 33 DF, p-value: 2.933e-14 > anova(regression1) Analysis of Variance Table Response: city Df Sum Sq Mean Sq F value Pr(>F) displ 1 662.72 662.72 161.45 2.933e-14 *** Residuals 33 135.45 4.10 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > attach(regression1) > output1 <- cbind(displ,city,fitted.values,residuals) > View(output1) > summary(output1) displ city fitted.values Min. :10.00 Min. :19.00 Min. :19.11 1st Qu.:18.00 1st Qu.:23.00 1st Qu.:22.71 Median :20.00 Median :24.00 Median :25.12 Mean :19.46 Mean :25.77 Mean :25.77 3rd Qu.:22.00 3rd Qu.:28.00 3rd Qu.:27.52 Max. :25.00 Max. :39.00 Max. :37.14 residuals Min. :-4.5229 1st Qu.:-1.4119 Median : 0.2851 Mean : 0.0000 3rd Qu.: 1.7701 Max. : 2.6891 > matplot(displ, cbind(city,fitted.values), pch="*", type="pl") > matplot(displ, cbind(residuals,0), pch="*", type="pl") >