/* stat214example.sas */ /* This program summarizes the stat214 example*/ /* ---------------------------------------------------------*/ /* ------ stat214 example ----- */ /* ---------------------------------------------------------*/ /* ---------------------------------------------------------*/ data stat214; input line section class $ sex $ age height weight siblings; bmi = 703*weight/(height*height); cards; 1 1 d_senior male 21 69 170 1 2 1 c_junior male 25 71 165 3 3 1 c_junior female 25 62 160 2 4 1 a_fresh male 18 72 162 1 5 1 c_junior female 22 63 170 1 6 1 a_fresh female 18 64 110 2 7 1 a_fresh female 18 60 103 1 8 1 a_fresh female 18 68 135 3 9 1 b_soph female 19 62 105 5 10 1 a_fresh male 18 74 190 2 11 1 b_soph female 20 70 150 1 12 1 d_senior female 21 61 116 1 13 1 a_fresh female 18 65 150 3 14 1 a_fresh female 19 64 140 4 15 1 a_fresh male 18 68 130 2 16 1 a_fresh female 18 63 110 2 17 1 b_soph female 21 62 125 1 18 1 a_fresh female 18 63 115 2 19 1 a_fresh female 19 64 135 3 20 1 a_fresh female 18 69 155 1 21 1 b_soph female 20 65 110 2 22 1 b_soph female 19 68 140 1 23 1 a_fresh female 47 66 110 1 24 1 b_soph female 20 70 145 2 25 1 a_fresh female 20 61 140 5 26 1 a_fresh female 18 63 180 0 27 1 c_junior male 22 70 175 2 28 1 a_fresh female 18 63 120 1 29 1 d_senior female 22 68 170 2 30 1 a_fresh female 18 66 125 3 31 1 c_junior male 22 75 205 2 32 1 a_fresh female 18 67 110 1 33 1 d_senior male 22 68 135 1 34 1 d_senior female 22 64 185 2 35 1 a_fresh female 41 61 96 1 36 1 c_junior female 22 59 95 5 37 2 c_junior female 20 66 110 1 38 2 c_junior male 20 72 180 1 39 2 c_junior female 21 66 120 1 40 2 b_soph female 21 61 105 3 41 2 a_fresh female 18 68 134 7 42 2 a_fresh female 28 66 130 4 43 2 b_soph female 26 64 135 4 44 2 b_soph female 19 64 117 1 45 2 a_fresh female 20 66 140 4 46 2 c_junior female 20 64 130 1 47 2 d_senior female 48 66 140 3 48 2 c_junior female 22 67 115 2 49 2 b_soph female 19 66 170 2 50 2 a_fresh male 18 66 190 3 51 2 b_soph female 21 67 135 4 52 2 a_fresh female 20 68 140 2 53 2 b_soph female 19 62 115 2 54 2 b_soph female 20 60 110 2 55 2 a_fresh male 18 72 185 3 56 2 d_senior male 23 72 190 2 57 2 d_senior male 24 69 170 4 58 2 c_junior male 21 72 140 3 59 2 c_junior female 20 65 112 2 60 2 c_junior female 21 62 130 1 61 2 a_fresh female 18 64 120 1 62 2 b_soph female 25 66 145 2 63 2 c_junior male 19 65 156 6 64 2 a_fresh female 18 67 125 0 65 2 c_junior female 44 66 165 4 66 2 b_soph male 19 71 155 3 67 2 b_soph female 19 62 133 2 ; /* ---------------------------------------------------------*/ /* stat214 sex distribution */ /* ---------------------------------------------------------*/ /* get tabular summaries */ proc freq data=stat214; tables sex; title 'stat214 sex distribution'; run; title; /* get graphical summaries */ proc sgplot data=stat214; title 'stat214: sex distribution'; hbar sex / stat=percent datalabel; /* stat=percent: horizontal bar graph in terms of percentages use stat=freq for frequencies datalabel displays percentages at end of bars */ run; title; /* ---------------------------------------------------------*/ /* stat214 sex distribution by section */ /* ---------------------------------------------------------*/ /* get tabular summaries */ proc freq data=stat214; tables sex; by section; title '214 sex distribution by section'; run; title; /* get graphical summaries */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: sex distribution by section'; panelby section / columns=1; hbar sex / stat=percent datalabel; /* horizontal bar graphs for each section columns=1: both in the same column, use column=2 for side by side stat=percent: use percentages (stat=freq for counts) WARNING: must have pctlevel=cell (in proc statement)to get correct percentages, i.e., 100% for each section */ run; title; proc sgplot data=stat214 pctlevel=group; title 'stat214: sex distribution by section (segmented bar graphs)'; hbar section / stat=percent group=sex; /* horizonatal segmented bar graphs one for each section stat=percent: use percentages WARNING: must have pctlevel=group (in proc statement)to get correct percentages, i.e., 100% for each section */ run; title; /* ---------------------------------------------------------*/ /* ---------------------------------------------------------*/ /* stat214 classification distribution */ /* ---------------------------------------------------------*/ /* get tabular summaries */ proc freq data=stat214; tables class; title 'stat214 classification distribution'; run; title; /* get graphical summaries */ proc sgplot data=stat214; title 'stat214: classification distribution'; hbar class / stat=percent datalabel; /* stat=percent: horizontal bar graph in terms of percentages use stat=freq for frequencies datalabel displays percentages at end of bars */ run; title; /* ---------------------------------------------------------*/ /* stat214 classification distribution by section */ /* ---------------------------------------------------------*/ /* get tabular summaries */ proc freq data=stat214; tables class; by section; title 'stat214 classification distribution by section'; run; title; /* get graphical summaries */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: classification distribution by section'; panelby section / columns=1; hbar class / stat=percent datalabel; /* horizontal bar graphs for each section columns=1: both in the same column, use column=2 for side by side stat=percent: use percentages (stat=freq for counts) WARNING: must have pctlevel=cell (in proc statement)to get correct percentages, i.e., 100% for each section */ run; title; proc sgplot data=stat214 pctlevel=group; title 'stat214: classification distribution by section (segmented bar graphs)'; hbar section / stat=percent group=class; /* horizonatal segmented bar graphs one for each section stat=percent: use percentages WARNING: must have pctlevel=group (in proc statement)to get correct percentages, i.e., 100% for each section */ run; title; /* ---------------------------------------------------------*/ /* ---------------------------------------------------------*/ /* stat214 number of siblings distribution */ /* ---------------------------------------------------------*/ /* numerical and graphical summaries */ proc freq data=stat214; tables siblings; title 'stat214 siblings distribution'; proc univariate data=stat214 nextrval=5; var siblings; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 siblings distribution'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; var siblings; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; var siblings; run; title; /* additional graphical summaries histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgplot data=stat214; title 'stat214: number of siblings distribution'; histogram siblings / datalabel=percent; density siblings / type=kernel; run; title; /* title assigns a title for the output --histogram options-- datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- the type=kernel requests a smooth fit to the data -- run; and title; resets the title */ /* ---------------------------------------------------------*/ /* stat214 number of siblings distribution by section */ /* ---------------------------------------------------------*/ /* numerical and graphical summaries */ proc freq data=stat214; tables siblings; by section; title 'stat214 siblings distribution by section'; proc univariate data=stat214 nextrval=5; class section; var siblings; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 siblings distribution'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; class section; var siblings; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class section; var siblings; run; title; /* additional graphical summaries histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: number of siblings distribution by section'; panelby section; histogram siblings / datalabel=percent; density siblings / type=kernel; run; title; /* title assigns a title for the output panelby section; requests histograms for each section --histogram options-- datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- the type=kernel requests a smooth fit to the data -- run; and title; resets the title */ proc sgplot data=stat214 pctlevel=group; histogram siblings / transparency=0.4 group=section; density siblings / type=kernel group=section; proc sgplot data=stat214; hbox siblings / group=section; /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* stat214 age distribution */ /* ---------------------------------------------------------*/ /* numerical and graphical summaries */ proc freq data=stat214; tables age; title 'stat214 age distribution'; proc univariate data=stat214 nextrval=5; var age; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 age distribution'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; var age; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; var age; run; title; /* additional graphical summaries histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgplot data=stat214; title 'stat214: age distribution'; histogram age / binstart=17 binwidth=2 datalabel=percent; density age / type=kernel; run; title; /* title assigns a title for the output --histogram options-- datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- the type=kernel requests a smooth fit to the data -- run; and title; resets the title */ /* ---------------------------------------------------------*/ /* stat214 age distribution by section */ /* ---------------------------------------------------------*/ /* numerical and graphical summaries */ proc freq data=stat214; tables age; by section; title 'stat214 age distribution by section'; proc univariate data=stat214 nextrval=5; class section; var age; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 age distribution'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; class section; var age; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class section; var age; run; title; /* additional graphical summaries histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: age distribution by section'; panelby section; histogram age / binstart=17 binwidth=2 datalabel=percent; density age / type=kernel; run; title; /* title assigns a title for the output panelby section; requests histograms for each section --histogram options-- datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- the type=kernel requests a smooth fit to the data -- run; and title; resets the title */ proc sgplot data=stat214 pctlevel=group; histogram age / binstart=17 binwidth=2 transparency=0.4 group=section; density age / type=kernel group=section; proc sgplot data=stat214; hbox age / group=section; /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* continuous variables */ /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* ---------------------------------------------------------*/ /* stat214 weight distribution */ /* ---------------------------------------------------------*/ proc univariate data=stat214 nextrval=5; var weight; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 weight distribution'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; var weight; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; var weight; run; title; /* ---------------------------------------------------------*/ /* additional graphics */ /* weight distribution -- histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgplot data=stat214; title 'stat214: weight distribution'; histogram weight / binwidth=10 binstart=95 datalabel=percent; density weight / type=kernel; run; title; /* title assigns a title for the output --histogram options-- these options are chosen to make the histogram correspond to a stem and leaf histogram with stem=10's binstart: first bin midpoint datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- the type=kernel requests a smooth fit to the data -- run; and title; resets the title */ /* -------------------------------------------------------- */ /* weight distribution -- box plot */ /* -------------------------------------------------------- */ proc sgplot data=stat214; hbox weight; title 'stat214: weight dist box plot'; run; title; /* ---------------------------------------------------------*/ /* stat214 weight distribution by sex */ /* ---------------------------------------------------------*/ proc univariate data=stat214 nextrval=5; class sex; var weight; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 weight distribution'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var weight; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var weight; run; title; /* ---------------------------------------------------------*/ /* additional graphics */ /* weight distribution by sex -- histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: weight distribution by sex'; panelby sex; histogram weight / binwidth=10 binstart=95 datalabel=percent; density weight / type=kernel; run; title; /* title assigns a title for the output -- panelby sex; requests a graph for each level of sex --histogram options-- these options are chosen to make the histogram correspond to a stem and leaf histogram with stem=10's binstart: first bin midpoint datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- type=kernel requests a fitted smooth density */ /* -------------------------------------------------------- */ /* weight distribution by sex -- histogram, smoothed histogram, and fitted normal distribution */ /* -------------------------------------------------------- */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: weight distribution by sex'; panelby sex; histogram weight / binwidth=10 binstart=95; density weight / type=normal; density weight / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* weight distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=stat214 pctlevel=group; histogram weight / binwidth=10 binstart=95 transparency=0.4 group=sex; density weight / type=kernel group=sex; title 'stat214: weight dist by sex'; proc sgplot data=stat214; hbox weight / group=sex; title 'stat214: weight dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* stat214 height distribution */ /* ---------------------------------------------------------*/ proc univariate data=stat214 nextrval=5; var height; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 height distribution'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; var height; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; var height; run; title; /* ---------------------------------------------------------*/ /* additional graphics */ /* height distribution -- histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgplot data=stat214; title 'stat214: height distribution'; histogram height / binwidth=1 datalabel=percent; density height / type=kernel; run; title; /* title assigns a title for the output --histogram options-- these options are chosen to make the histogram correspond to a stem and leaf histogram with stem=10's binstart: first bin midpoint datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- the type=kernel requests a smooth fit to the data -- run; and title; resets the title */ /* -------------------------------------------------------- */ /* height distribution -- box plot */ /* -------------------------------------------------------- */ proc sgplot data=stat214; hbox height; title 'stat214: height dist box plot'; run; title; /* ---------------------------------------------------------*/ /* stat214 height distribution by sex */ /* ---------------------------------------------------------*/ proc univariate data=stat214 nextrval=5; class sex; var height; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 height distribution by sex'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var height; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var height; /* class sex; indicates that the data are to be divided into classes corresponding to the levels of the classification variable -- sex in this example NOTE: the data do not need to be sorted with the class statement */ run; title; /* ---------------------------------------------------------*/ /* additional graphics */ /* height distribution by sex -- histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: height distribution by sex -- binwidth=2'; panelby sex; histogram height / binwidth=2 datalabel=percent; density height / type=kernel; run; title; /* title assigns a title for the output -- panelby sex; requests a graph for each level of sex --histogram options-- these options are chosen to make the histogram correspond to a stem and leaf histogram with stem=10's binstart: first bin midpoint datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- type=kernel requests a fitted smooth density */ /* -------------------------------------------------------- */ /* height distribution by sex -- histogram, smoothed histogram, and fitted normal distribution */ /* -------------------------------------------------------- */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: height distribution by sex -- binwidth=1'; panelby sex; histogram height / binwidth=1; density height / type=normal; density height / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* height distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=stat214; hbox height / group=sex; title 'stat214: height dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* Now we create the histograms with normal and kernel density curves overlaid on the same graph */ /* -------------------------------------------------------- */ /* height distribution by sex */ /* -------------------------------------------------------- */ proc sgplot data=stat214 pctlevel=group; histogram height / binwidth=1 transparency=0.4 group=sex; density height / type=kernel group=sex; title 'stat214: height dist by sex -- binwidth=1'; run; title; /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* stat214 bmi distribution */ /* ---------------------------------------------------------*/ proc univariate data=stat214 nextrval=5; var bmi; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 bmi distribution'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; var bmi; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; var bmi; run; title; /* ---------------------------------------------------------*/ /* additional graphics */ /* bmi distribution -- histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgplot data=stat214; title 'stat214: bmi distribution -- binwidth=1'; histogram bmi / binwidth=1 datalabel=percent; density bmi / type=kernel; run; title; /* title assigns a title for the output --histogram options-- these options are chosen to make the histogram correspond to a stem and leaf histogram with stem=10's binstart: first bin midpoint datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- the type=kernel requests a smooth fit to the data -- run; and title; resets the title */ /* -------------------------------------------------------- */ /* bmi distribution -- box plot */ /* -------------------------------------------------------- */ proc sgplot data=stat214; hbox bmi; title 'stat214: bmi dist box plot'; run; title; /* ---------------------------------------------------------*/ /* stat214 bmi distribution by sex */ /* ---------------------------------------------------------*/ proc univariate data=stat214 nextrval=5; class sex; var bmi; ods select ExtremeValues BasicMeasures Quantiles; title 'stat214 bmi distribution by sex'; proc means data=stat214 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var bmi; proc means data=stat214 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var bmi; /* class sex; indicates that the data are to be divided into classes corresponding to the levels of the classification variable -- sex in this example NOTE: the data do not need to be sorted with the class statement */ run; title; /* ---------------------------------------------------------*/ /* additional graphics */ /* bmi distribution by sex -- histogram and smoothed histogram */ /* -------------------------------------------------------- */ proc sgpanel data=stat214; title 'stat214: bmi distribution by sex -- binwidth=2'; panelby sex; histogram bmi / binwidth=2 datalabel=percent; density bmi / type=kernel; run; title; /* title assigns a title for the output -- panelby sex; requests a graph for each level of sex --histogram options-- these options are chosen to make the histogram correspond to a stem and leaf histogram with stem=10's binstart: first bin midpoint datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- type=kernel requests a fitted smooth density */ /* -------------------------------------------------------- */ /* bmi distribution by sex -- histogram, smoothed histogram, and fitted normal distribution */ /* -------------------------------------------------------- */ proc sgpanel data=stat214 pctlevel=cell; title 'stat214: bmi distribution by sex -- binwidth=1'; panelby sex; histogram bmi / binwidth=1; density bmi / type=normal; density bmi / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* bmi distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=stat214; hbox bmi / group=sex; title 'stat214: bmi dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* Now we create the histograms with normal and kernel density curves overlaid on the same graph */ /* -------------------------------------------------------- */ /* bmi distribution by sex */ /* -------------------------------------------------------- */ proc sgplot data=stat214 pctlevel=group; histogram bmi / binwidth=1 transparency=0.4 group=sex; density bmi / type=kernel group=sex; title 'stat214: bmi dist by sex -- binwidth=1'; run; title; /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* -------------------------------------------------------- */ /* ------ end of stat214 example -------------------------- */ /* -------------------------------------------------------- */