- Combining SAS data sets: Stacking
data dataname;
set data1 data2;
Useful for combining data with all or most of the same variables but different observations
data southentrance;
input entrance $ passnumber partysize age;
datalines;
S 43 3 27
S 44 3 24
S 45 3 2
;
data northentrance;
input entrance $ passnumber partysize age lot;
datalines;
N 21 5 41 1
N 87 4 33 3
N 65 2 67 1
N 66 2 7 1
;
data both;
set southentrance northentrance;
if age < 3then amount = 0;
elseif age > 65then amount = 27;
else amount = 35;
procprint;
run;
interleaving:stack data by order of a variable
procsortdata=southentrance;
by passnumber;
procsortdata=northentrance;
by passnumber;
data both_ordered;
set southentrance northentrance;
by passnumber;
procprint;
run;
- Combining SAS data sets: Merging
Data newdata;
merge data1 data2;
by variable;
Useful when you want to match observations from data1 with those from data2.
Data1 and data2 have to have at least one common variable.
Note variables with the same names besides those in the BY variables will be merged into one,with the second dataset overwriting the first one.
data shoes;
input style $ 1-15 ExcerciseType $ Sales price;
datalines;
Max Flight running 1930 142.99
Zip fit leather walking 2250 83.99
zoom airborne running 4150 112.99
Light step walking 1130 73.99
Max step woven walking 2230 75.99
zip sneak c-train 1190 92.99
;
data discount;
input ExcerciseType $ discount;
datalines;
c-train .25
running .30
walking .20
;
procsortdata=shoes;
by ExcerciseType;
procsortdata=discount;
by ExcerciseType;
data prices;
merge shoes discount;
by ExcerciseType;
newprice=price-price*discount;
procprint;run;
Merge a dataset with a summary dataset
procmeansdata=shoes sum;
class ExcerciseType;
var sales;
outputout=sums sum=Sales_sum;
run;
data sums1;
set sums;
if _TYPE_= 1;
procsortdata=sums1;
by ExcerciseType;
data shoes1;
merge shoes sums1(drop=_TYPE_ _FREQ_);
by ExcerciseType;
percentage=sales/Sales_sum;
procprintdata=shoes1;
by ExcerciseType;
ID ExcerciseType;
run;
Merge a single observation with many, without a common variable
data sums2;
set sums;
if _TYPE_=0;
data shoes2;
if _N_=1thenset sums2;
set shoes;
percentage=sales/Sales_sum;
procprintdata=shoes2; run;
Merge on multiple variables
data worker;
input ID year work $ :10. WBC;
datalines;
1 1940 Mixer 6000
2 1940 Spreader 8000
3 1940 Mixer 9000
1 1941 Mixer 6500
2 1941 Mixer 8500
3 1941 Spreader 8900
;
data exp;
input year work $:10. exposure;
datalines;
1940 Mixer 190
1940 Spreader 200
1941 Mixer 140
1941 Spreader 150
1942 Mixer 90
1942 spreader 100
1943 Mixer 70
1943 Spreader 80
;
Procsortdata=worker;
by year work;
procsortdata=exp;
by year work;
data combine;
merge worker exp;
by year work;
run;
procprintdata=combine;run;
- Combining SAS data sets: Updating a master data set with an update data set
data master-data-set;
update master-data-set update-date-set;
by variable-list;
data patients;
infile datalines dlm=",";
length lastname $10 address $30;
input account lastname $ address $ birthdate mmddyy10. gender $ insurance $ lastupdate mmddyy10.;
datalines;
620135, Smith, 234 Aspen st.,12-21-1975, m, CBC,02-16-1998
645722, Miyamoto, 65 3rd Ave.,04-03-1936, f, MCR,05-30-1999
645739, Jensvold, 505 Glendale ave.,06-15-1960, f, HLT,09-23-1993
874329, Kazoyan, 76-C la vista, ., ., MCD,01-15-2003
;
data transactions;
infile datalines dlm=",";
length lastname $10 address $30;
input account lastname $ address $ birthdate mmddyy10. gender $ insurance $ lastupdate mmddyy10.;
datalines;
620135, ., ., ., ., HLT,06-15-2003
874329, ., .,04-24-1954, m, .,06-15-2003
235777, Harman, 5656 Land Way,01-18-2000, f, MCD,06-15-2003
;
procsortdata=transactions;
by account;
procsortdata=patients;
by account;
data patients;
update patients transactions;
by account;
run;
procprintdata=patients;run;
Data set options
dataname (drop=variables, keep=variables, rename=(oldname=newname), in=varname)
data shoes;
input style $ 1-15 ExcerciseType $ :10. Sales price;
datalines;
Max Flight running 1930 142.99
Zip fit leather walking 2250 83.99
zoom airborne running 4150 112.99
Light step walking 1130 73.99
Max step woven walking 2230 75.99
zip sneak c-train 1190 92.99
Air basketball 1000 150
;
data discount;
input ExcerciseType $ discount;
datalines;
c-train .25
running .30
walking .20
soccer .30
;
procsortdata=shoes;
by ExcerciseType;
procsortdata=discount;
by ExcerciseType;
data prices;
merge shoes(in=index1) discount(in=index2);
by ExcerciseType;
if index1 or index2;
newprice=price-price*discount;
ind1=index1;
ind2=index2;
procprint;run;