-
Notifications
You must be signed in to change notification settings - Fork 3
/
helpgetstarted02.sas
157 lines (126 loc) · 3.3 KB
/
helpgetstarted02.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
* ===========================================
Check to make sure you have run helpformats01.sas
first to create and apply the formats
and the create the library "library"
If you've already got the formats.sas7bcat
created, you can run the LIBNAME statement
again if needed, otherwise comment this code out
or skip it
* ===========================================;
* ===========================================
CHANGE the location of the DIRECTORY below
to the location where your files are
CREATE a link to your files called "library"
* ===========================================;
libname library 'C:\MyGithub\N736Fall2017_HELPdataset\' ;
* ===========================================
In general it is a good idea to make a
copy of the original data - here I'm
putting a copy into the WORK library.
The rest of the code is then run in the WORK
library (which is temporary) so the original
file is left untouched.
* ===========================================;
* make a copy to WORK;
data help;
set library.helpmkh;
run;
* the formatting is now applied and should work
check by running PROC FREQ or PROC CONTENTS;
proc freq data=help;
tables female;
run;
proc freq data=help;
table f1a;
run;
* see contents and formatting details
and labelling;
proc contents data=help;
run;
* ===========================================
Getting UNIVARIATE statistics
PROC UNIVARIATE AND PROC MEANS are both helpful
* ===========================================;
* get univariate stats;
proc univariate data=help plots;
var age;
run;
* OPTIONAL
* can change the percentile algorithm
* default is PCTLDEF=5, but there are
* options 1,2,3,4 or 5 - see help for more details;
proc univariate data=help plots pctldef=1;
var age;
run;
* try algorithm 3;
proc univariate data=help plots pctldef=3;
var age;
run;
* get univariate stats
* add histogram
* and overlay normal curve;
proc univariate data=help plots pctldef=1;
var age;
histogram age / normal;
run;
* get other probability plots;
proc univariate data=help plots pctldef=1;
var age;
ppplot age;
probplot age;
qqplot age;
run;
* get univariate stats
add a qqplot
and run normality tests;
proc univariate data=help plots normaltest;
var age;
histogram age / normal;
qqplot age;
run;
* TRY WITH OTHER VARIABLES;
* some plots - boxplot;
* to get a boxplot of 1 variable
* we have to create a dummy variable
* that is a constant, x=1, then we can use
* this variable to trick SAS into making 1 boxplot;
data help2;
set help;
x=1;
run;
proc boxplot data=help2;
plot age*x;
run;
* boxplot of age by racegrp
* using proc sgplot and VBOX option;
PROC SGPLOT DATA=help;
VBOX age / category = racegrp;
RUN;
* can change the percentile method if you want;
PROC SGPLOT DATA=help;
VBOX age / category = racegrp percentile=3;
RUN;
* other summaries;
proc means data=help;
var age;
run;
* ages by race;
proc means data=help;
var age;
class racegrp;
run;
* get summary stats
for multiple vars;
proc means data=help n min max mean std median q1 q3;
var age cesd pcs mcs;
run;
* get summary stats
by gender female;
proc means data=help n min max mean std median q1 q3;
var age cesd pcs mcs;
class female;
run;
* categorical data;
proc freq data=help;
tables racegrp / plots=freqplot;
run;