/* soymilk.sas */ options ls=72 ps=60; data soymilk; infile 'soymilk.dat'; input time protein; proc print data=soymilk; title 'soymilk data'; proc plot data=soymilk; plot protein*time; title 'soymilk data scatterplot'; proc glm data=soymilk; model protein = time / ss1 ss2 p; output out=new predicted=yhat residual=resid; title 'regression of protein on time'; proc plot data=new; plot resid*time / vref=0; title 'residual plot: regression of protein on time'; proc glm data=soymilk; class time; model protein = time; title 'means model'; /* test for lack of fit using output from above */ data lof; input sswithin dfwithin ssresid dfresid; sslof = ssresid - sswithin; dflof = dfresid - dfwithin; mslof = sslof / dflof; mswithin = sswithin / dfwithin; fcalc = mslof / mswithin; pvalue = 1 - probf(fcalc,dflof,dfwithin); cards; 0.29466667 8 0.77425463 10 ; proc print data=lof; var mslof dflof mswithin dfwithin fcalc pvalue; title 'test for lack of fit for simple linear regression'; /* test for lack of fit using polynomial trick */ data soymilk2; infile 'soymilk.dat'; input time protein; t2 = time * time; t3 = t2 * time; proc print data=soymilk2; title 'soymilk data with time squared and time cubed'; proc glm data=soymilk2; model protein = time t2 t3 / ss1 ss2 p; contrast 'lack of fit' t2 1, t3 1 / e; title 'test for lack of fit via cubic regression of protein on time'; /* add selected time values (with missing protein values) to allow plotting of the fitted curve and fit quadratic regression */ data addtime; input time protein; t2 = time * time; t3 = time * t2; cards; 0 . .1 . .2 . .3 . .4 . .5 . .6 . .7 . .8 . .9 . 1 . ; data soymilk3; set soymilk2 addtime; proc glm data=soymilk3; model protein = time t2 / ss1 ss2 p; output out=new2 predicted=yhat2 residual=resid2 lcl=clpl ucl=clpu lclm=clml uclm=clmu; title 'quadratic regression of protein on time'; proc print data=new2; var protein time clml clmu clpl clpu yhat2 resid2; proc plot data=new2; plot resid2*time / vref=0; title 'residual plot: quadratic model'; /*create data set for plotting curve with data */ data new3; set new2; code = '*'; if protein = . then code = '+'; newy = protein; if protein = . then newy = yhat2; proc print; proc plot data=new3; plot newy*time = code; title 'plot of data and fitted quadratic model'; run;