options notes;

* This is an example of SAS code that may be used to calculate Medians using Linear Interpolation *;

* It is provided as is. Any modifications are the responsibility of the data user. *;

* Basic concept of Linear Interpolation: *;

* 1. Place continuous data into categories *;

* 2. Find the category containing the simple median value *;

* 3. Calculate the median with linear interpolation *;

* This example is for median earnings for males who are employed full-time, year round *;

* It uses the ACS Public Use Microdata Sample (PUMS). *;

* Location of PUMS data: *;

* Step 1: Change the libname statement to point to the location of your data *;

* Replace dataset_namewith thename of the dataset (e.g. pums_pus for PUMS Person data for US) *;

* Change if statement so that it is the relevant universe for your estimate *;

libname libref1 "<location of data>";

data recode_data;

set libref1.<dataset_name> (keep = st pernp agep sex wkhp wkw pwgtp adjinc) ; * <-- Replace dataset_name *;

if pernp~=. then apern = (pernp * (adjinc / 1000000)); * Using PUMS data, need to adjust person earnings for inflation *;

* Restrict data to estimate of interest: In this example median earnings of males who work full-time, year round *;

if (15<agep and apern not in (.,0) and sex = "1" and (wkw = "1" and wkhp~=. and 35<=wkhp));

* Step 2 Place data into categories. For this example, we use 101 categories.The categories increase in increments of $2,500 *;

if apern ~= . then do;

if apern<2500 then apern_rc=1;

else if 2500<=apern and apern<250000 then apern_rc = (floor(apern/2500) + 1) ;

else if 250000<=apern then apern_rc = 101;

end;

run;

* Output data to spot check recodes are correct. *;

proc print data = recode_data (where=(apern~=.) obs = 4);

title3 "Output first 4 observations where APERN not blank";

title4 "Check that PERNP and APERN_RC are properly recoded";

title5 "APERN = PERNP * (ADJINC / 1,000,000)";

title6 "APERN_RC should have 101 categories";

title7 "Categories: 1 for below $2,500, 2 for $2,500 to $4,999, etc.";

run;

* Step 3. Calculate weighted distribution to find the median and sort data *;

* The SAS option COMPLETETYPES calculates all combinations of the class variables. *;

* This is useful when calculating multiple medians at once (not covered in this example). *;

proc means data = recode_data sum completetypes noprint;

class apern_rc;

var pwgtp;

output out = distribution_for_median sum = freq;

run;

* Sort to put total at the beginning (used in next step) *;

proc sort data = distribution_for_median;

by apern_rc;

run;

* Step4: Calculate the median using linear interpolation *;

* NOTE: When using ACS data, the margin of error of the median should also be calculated. See the PUMS technicaldocumentationfor instructions. *;

data calculate_median ;

length flag $20;

set distribution_for_median;

by apern_rc;

retain total median;

* Identifies Total, sets the cumulative frequency to zero *;

if apern_rc=. then do;

total=freq;

cumulative_freq=0;

end;

if apern_rc ne . then do;

if freq = . then freq = 0;

pct = 100 * freq / total;

cumulative_freq + freq;

cumulative_pct = 100 * cumulative_freq / total;

* Identifies category containing the simple median *;

if 50<=cumulative_pct and (cumulative_pct-pct)<50 then do;

* Median undefined in first and 101st categories because they are open-ended *;

if 2<= (apern_rc - 1) <= 100 then do;

median = ((apern_rc-1)*2500) + (((total/2)-(cumulative_freq-freq))/freq)*2500;

* Explanation of Median calculation using Linear Interpolation *;

* median = (lower bound of category) + ((total/2 - total before category containing median) /total in cat.) X width of category *;

* lower bound of category = ((apern_rc-1) X 2500), e.g. $5,000 if simple median in category for $5,000 to $7,499 *;

* total before category contain median = (cumulative_freq - freq) *;

* width of category, e.g. $2,500 *;

* total in category = freq *;

flag = "Median";

end;

else do;

* Set flag to indicate when Median is Undefined *;

median = .;

flag = "Median UNDEFINED";

end;

output;

end;

end;

run;

proc print data = calculate_median;

format median comma12. ;

var flag median;

title3 "Median Value";

run;

* Additional Notes: *;

* PUMS documentation: *;

* PUMS Technical Documentation: *;

1