options notes;
* This is an example of SAS code that may be used to calculate Medians using Linear Interpolation *;
* It is provided as is. Any modifications are the responsibility of the data user. *;
* Basic concept of Linear Interpolation: *;
* 1. Place continuous data into categories *;
* 2. Find the category containing the simple median value *;
* 3. Calculate the median with linear interpolation *;
* This example is for median earnings for males who are employed full-time, year round *;
* It uses the ACS Public Use Microdata Sample (PUMS). *;
* Location of PUMS data: *;
* Step 1: Change the libname statement to point to the location of your data *;
* Replace dataset_namewith thename of the dataset (e.g. pums_pus for PUMS Person data for US) *;
* Change if statement so that it is the relevant universe for your estimate *;
libname libref1 "<location of data>";
data recode_data;
set libref1.<dataset_name> (keep = st pernp agep sex wkhp wkw pwgtp adjinc) ; * <-- Replace dataset_name *;
if pernp~=. then apern = (pernp * (adjinc / 1000000)); * Using PUMS data, need to adjust person earnings for inflation *;
* Restrict data to estimate of interest: In this example median earnings of males who work full-time, year round *;
if (15<agep and apern not in (.,0) and sex = "1" and (wkw = "1" and wkhp~=. and 35<=wkhp));
* Step 2 Place data into categories. For this example, we use 101 categories.The categories increase in increments of $2,500 *;
if apern ~= . then do;
if apern<2500 then apern_rc=1;
else if 2500<=apern and apern<250000 then apern_rc = (floor(apern/2500) + 1) ;
else if 250000<=apern then apern_rc = 101;
end;
run;
* Output data to spot check recodes are correct. *;
proc print data = recode_data (where=(apern~=.) obs = 4);
title3 "Output first 4 observations where APERN not blank";
title4 "Check that PERNP and APERN_RC are properly recoded";
title5 "APERN = PERNP * (ADJINC / 1,000,000)";
title6 "APERN_RC should have 101 categories";
title7 "Categories: 1 for below $2,500, 2 for $2,500 to $4,999, etc.";
run;
* Step 3. Calculate weighted distribution to find the median and sort data *;
* The SAS option COMPLETETYPES calculates all combinations of the class variables. *;
* This is useful when calculating multiple medians at once (not covered in this example). *;
proc means data = recode_data sum completetypes noprint;
class apern_rc;
var pwgtp;
output out = distribution_for_median sum = freq;
run;
* Sort to put total at the beginning (used in next step) *;
proc sort data = distribution_for_median;
by apern_rc;
run;
* Step4: Calculate the median using linear interpolation *;
* NOTE: When using ACS data, the margin of error of the median should also be calculated. See the PUMS technicaldocumentationfor instructions. *;
data calculate_median ;
length flag $20;
set distribution_for_median;
by apern_rc;
retain total median;
* Identifies Total, sets the cumulative frequency to zero *;
if apern_rc=. then do;
total=freq;
cumulative_freq=0;
end;
if apern_rc ne . then do;
if freq = . then freq = 0;
pct = 100 * freq / total;
cumulative_freq + freq;
cumulative_pct = 100 * cumulative_freq / total;
* Identifies category containing the simple median *;
if 50<=cumulative_pct and (cumulative_pct-pct)<50 then do;
* Median undefined in first and 101st categories because they are open-ended *;
if 2<= (apern_rc - 1) <= 100 then do;
median = ((apern_rc-1)*2500) + (((total/2)-(cumulative_freq-freq))/freq)*2500;
* Explanation of Median calculation using Linear Interpolation *;
* median = (lower bound of category) + ((total/2 - total before category containing median) /total in cat.) X width of category *;
* lower bound of category = ((apern_rc-1) X 2500), e.g. $5,000 if simple median in category for $5,000 to $7,499 *;
* total before category contain median = (cumulative_freq - freq) *;
* width of category, e.g. $2,500 *;
* total in category = freq *;
flag = "Median";
end;
else do;
* Set flag to indicate when Median is Undefined *;
median = .;
flag = "Median UNDEFINED";
end;
output;
end;
end;
run;
proc print data = calculate_median;
format median comma12. ;
var flag median;
title3 "Median Value";
run;
* Additional Notes: *;
* PUMS documentation: *;
* PUMS Technical Documentation: *;
1