/* pollen.sas */ options ls=72 ps=60; data pollen; infile 'pollen.dat'; input prop time; proc sort data=pollen out=sorted; by time; proc print data=sorted; proc plot data=pollen; plot prop*time; title 'plot of prop vs time (all data)'; /* regressions with x = time */ proc glm data=pollen; model prop = time / ss1 ss2 p clm clparm; output out=new1 predicted=yhat residual=resid lcl=clpl ucl=clpu lclm=clml uclm=clmu; proc print data=new1; var time prop yhat clml clmu clpl clpu; title 'regression of prop on time (all data)'; proc plot data=new1; plot resid*time / vref=0; proc univariate data=new1 plot normal; var resid; proc rank data=new1 normal=blom out=new1a; var resid; ranks nscore; proc plot data=new1a; plot nscore*resid; /* fit means model to test for lack of fit */ proc glm data=pollen; class time; model prop = time; title 'means model (all data)'; /* test for lack of fit using output from above */ data lof1; input dfwithin sswithin dfresid ssresid; sslof = ssresid - sswithin; dflof = dfresid - dfwithin; mslof = sslof / dflof; mswithin = sswithin / dfwithin; fcalc = mslof / mswithin; pvalue = 1 - probf(fcalc,dflof,dfwithin); cards; 15 0.36386667 33 0.90183662 ; proc print data=lof1; var mslof dflof mswithin dfwithin fcalc pvalue; title 'test for lack of fit (all data)'; /* exclude times over 28 */ data pollen2; set pollen; if time > 28 then delete; proc plot data=pollen2; plot prop*time; title 'plot of prop vs time (data with time <= 28)'; proc glm data=pollen2; model prop = time / ss1 ss2 p clm clparm; output out=new2 predicted=yhat residual=resid lcl=clpl ucl=clpu lclm=clml uclm=clmu; proc print data=new2; var time prop yhat clml clmu clpl clpu; title 'regression of prop on time (data with time <= 28)'; proc plot data=new2; plot resid*time / vref=0; proc univariate data=new2 plot normal; var resid; proc rank data=new2 normal=blom out=new2a; var resid; ranks nscore; proc plot data=new2a; plot nscore*resid; data sorted2; set sorted; if time > 28 then delete; /* fit means model to test for lack of fit */ proc glm data=pollen2; class time; model prop = time; title 'means model (data with time <= 28)'; /* test for lack of fit using output from above */ data lof2; input dfwithin sswithin dfresid ssresid; sslof = ssresid - sswithin; dflof = dfresid - dfwithin; mslof = sslof / dflof; mswithin = sswithin / dfwithin; fcalc = mslof / mswithin; pvalue = 1 - probf(fcalc,dflof,dfwithin); cards; 15 0.36386667 31 0.61419382 ; proc print data=lof2; var mslof dflof mswithin dfwithin fcalc pvalue; title 'test for lack of fit (data with time <= 28)'; run;