*this code can be used to replicate the results presented in Table 4 of; *"It could be overreaction, not lottery-seeking, that is behind Bali, Cakici and Whitelaw's MAX effect"; *by J. Gorman, F. Akhtar, R.B. Durand and J. Gould, CFR 2022; *5 data sets are required to execute this code; *1) Factors_monthly.CSV, 2) Factors_daily.CSV; *3) CRSP_monthly_sample.sas7bdat, 4) CRSP_daily_sample.sas7bdat; *5) Compustat_BV_sample.sas7bdat; *truncated sample data sets are provided with this code; *the code references the data sets from "E:\"; *import data *dataset containing Fama French Carhart 4 factors; proc import out=work.a_ffc4_monthly datafile= 'E:\Factors_monthly.CSV' dbms=csv replace; run; *dataset containing Fama French Carhart 4 factors; proc import out=work.a_ffc4_daily datafile= 'E:\Factors_daily.CSV' dbms=csv replace; run; *reformat date variable to SAS date; data a_ffc4_monthly; retain dateff; set a_ffc4_monthly (rename=(dateff=datein)); year=int(datein/100); month=int((datein-year*100)); day=1; dateff=mdy(month,day,year); format dateff mmddyy10.; drop year month day datein; run; data a_ffc4_daily; retain date; set a_ffc4_daily (rename=(date=datein)); year=int(datein/10000); month=int((datein/100-year*100)); day=int((datein-year*10000-month*100)); date=mdy(month,day,year); format date mmddyy10.; drop year month day datein; run; *dataset containing monthly: share codes, exchange code, price, ask/high, bid/low, volume, return, shares outstanding; data a_data_monthly; set 'E:\CRSP_monthly_sample.sas7bdat'; run; *daily file containing returns, share code ,exchange code, volume; data a_data_daily; set 'E:\CRSP_daily_sample.sas7bdat'; run; data returns_temp1; set a_data_daily; keep permno date shrcd exchcd vol ret; run; *Input and format daily returns data from CRSP; proc sort data=returns_temp1 out=returns_temp2; by permno date; run; proc delete data=returns_temp1;run; data returns_temp3; set returns_temp2; by permno date; if first.permno and missing(shrcd) then delete; if first.permno and missing(exchcd) then delete; run; proc delete data=returns_temp2;run; *continue time-series of exchange codes and share codes in case of missing data; data returns_temp4; set returns_temp3; by permno date; retain lexchcd lshrcd; if first.permno then do; lexchcd = exchcd; lshrcd = shrcd; end; else do; if missing(exchcd) then exchcd = lexchcd; else lexchcd = exchcd; if missing(shrcd) then exchcd = lshrcd; else lshrcd = shrcd; end; drop lexchcd lshrcd; run; proc delete data=returns_temp3;run; *delete share codes other than 10 and 11 and include only NYSE, AMEX, NASDAQ stocks; data returns_2; set returns_temp4 (rename=(ret=ret_char)); if missing(permno) then delete; else if missing(date) then delete; else if missing(SHRCD) then delete; else if missing(EXCHCD) then delete; else if missing(ret_char) then delete; * else if missing(vol) then delete; else if ret_char = 'C' then delete; else if SHRCD < 10 or SHRCD > 11 then delete; else if EXCHCD < 1 or EXCHCD > 3 then delete; month=month(date); year=year(date); ret = input(ret_char,best.); permno_year_month = catx(',',permno,year,month); drop SHRCD EXCHCD ret_char; run; proc delete data=returns_temp4;run; proc sql; create table returns as select a.*, b.* from returns_2 a inner join a_ffc4_daily b on a.date=b.date; quit; proc delete data=returns_2;run; data returns_2; set returns; mktrf_=mktrf/100; smb_=smb/100; hml_=hml/100; umd_=umd/100; rf_=rf/100; excessret=ret-rf_; drop mktrf smb hml umd rf; run; proc delete data=returns;run; *find MAX in each month; *sort by permno year and month and descending return for each stock so that the first return for each month for each permno is the MAX event; *note, sort by date so that it is the first MAX event for the month in case there is more than one of the same return; proc sort data=returns_2 out=returns_by_pym; by permno_year_month descending ret date; run; *only MAX events remain after this step; data returns_max; set returns_by_pym (rename=(ret=event_ret date=edate)); by permno_year_month; if first.permno_year_month; year_month = catx(',',year,month); drop permno_year_month; run; proc delete data=returns_by_pym;run; proc sort data=returns_max out=returns_max_by_ym; by year_month; run; proc delete data=returns_max; run; *assign deciles; proc rank data=returns_max_by_ym out=max_events groups=10; by year_month; var event_ret; ranks decile; run; proc delete data=returns_max_by_ym; run; data max_events_1; set max_events; if month(edate) = 1 then month_t_n1 = 12; else month_t_n1 = month(edate) - 1; if month(edate) = 1 then year_t_n1 = year(edate) - 1; else year_t_n1 = year(edate); rename month=month_t; rename year=year_t; drop year_month; run; proc delete data=max_events; run; data max_events; set max_events_1; max_month_t=intnx('month',edate,0); format max_month_t mmddyy10.; postmax_month_t_p1 = intnx("month",max_month_t,+1,'same'); format postmax_month_t_p1 mmddyy10.; premax_month_t_n1 = intnx("month",max_month_t,-1,'same'); format premax_month_t_n1 mmddyy10.; run; proc delete data=max_events_1; run; *Input and format monthly returns data from CRSP and FFC4 factors; proc sort data=a_data_monthly out=monthly_returns_t2; by permno date; run; data monthly_returns_t3; set monthly_returns_t2; by permno date; if first.permno and missing(shrcd) then delete; if first.permno and missing(exchcd) then delete; run; proc delete data=monthly_returns_t2; run; *continue time-series of exchange codes and share codes in case of missing data; data monthly_returns_t4; set monthly_returns_t3; by permno date; retain lexchcd lshrcd; if first.permno then do; lexchcd = exchcd; lshrcd = shrcd; end; else do; if missing(exchcd) then exchcd = lexchcd; else lexchcd = exchcd; if missing(shrcd) then exchcd = lshrcd; else lshrcd = shrcd; end; drop lexchcd lshrcd; run; proc delete data=monthly_returns_t3; run; *delete share codes other than 10 and 11 and include only NYSE, AMEX, NASDAQ stocks; data monthly_returns_2; set monthly_returns_t4 (rename=(ret=ret_char)); if missing(permno) then delete; else if missing(date) then delete; else if missing(SHRCD) then delete; else if missing(EXCHCD) then delete; else if missing(ret_char) then delete; else if ret_char = 'C' then delete; if 1 <= EXCHCD <= 3; if 10 <= SHRCD <= 11; ret = input(ret_char,best.); price = abs(prc); month = month(date); year = year(date); day=1; dateff=mdy(month,day,year); format dateff mmddyy10.; keep permno price date vol shrout ret month year dateff; run; *merge fama french factors onto monthly returns so we can compute alpha; proc sql; create table monthly_returns_3 as select a.*, b.mktrf, b.smb, b.hml, b.rf, b.umd, b.st_rev from monthly_returns_2 a inner join a_ffc4_monthly b on a.dateff = b.dateff; quit; proc delete data=monthly_returns_t4; run; proc delete data=monthly_returns_2; run; *create event list; data permnos; set max_events; keep permno; run; proc sort data=permnos; by permno; run; data permnos; set permnos; by permno; if first.permno; run; *market cap and prices; data mv_1; set a_data_daily; keep permno date shrcd exchcd prc shrout; run; data mktcap_price_1; if _n_=1 then do; if 0 then set permnos; declare hash h(dataset:'permnos'); h.definekey('permno'); h.definedone(); end; set mv_1; if h.find()=0; run; proc delete data=mv_1;run; data mktcap_price_2; set mktcap_price_1 (where=(not missing(prc) and not missing(shrout))); price = abs(prc); market_cap=price * shrout; year=year(date); month=month(date); max_month_t=intnx('month',date,0); format max_month_t mmddyy10.; permno_year_month = catx(',',permno,year,month); run; proc delete data=mktcap_price_1;run; proc sort data=mktcap_price_2; by permno_year_month descending date; run; data mktcap_price; set mktcap_price_2; by permno_year_month; if first.permno_year_month; run; proc delete data=mktcap_price_2;run; *book-to-market; data a_data_bm; set 'E:\Compustat_BV_sample.sas7bdat'; run; data bv_1; set a_data_bm (rename=(lpermno=permno)); if month(datadate) ^= fyr then delete; if not missing(SEQ) then SHE=SEQ; else if not missing(CEQ) then SHE=CEQ+PSTK; else if not missing(AT) and not missing(LT) then SHE=AT-LT; if missing(TXDITC) then TXDITC=0; if curcd ^= 'USD' then delete; cal_year_end = fyear; run; data bv_2; set bv_1; if not missing(PSTKRV) then BE=SHE-PSTKRV+TXDITC; else if not missing(PSTKL) then BE=SHE-PSTKL+TXDITC; else if not missing(PSTK) then BE=SHE-PSTK+TXDITC; else BE=SHE; run; data bv_3; set bv_2; if missing(BE) then delete; permnoyear=catx(',',permno,cal_year_end); run; proc sort data=bv_3 noduprecs; by permnoyear descending datadate; run; data bv_4; set bv_3; by permnoyear; if first.permnoyear; run; data mv_bm_1; set mktcap_price; permno_year=catx(',',permno,year); run; proc sort data=mv_bm_1 out=mv_bm_2; by permno_year descending date; run; data mv_bm_3; set mv_bm_2; by permno_year; if first.permno_year; if month(date) ^= 12 then delete; cal_year_end = year(date); run; proc sql; create table bm_1 as select a.*, b.prc, b.shrout from bv_4 a inner join mv_bm_3 b on a.permno = b.permno and a.cal_year_end = b.cal_year_end; quit; data bm_2; set bm_1; if not missing(shrout) then market_cap=abs(prc) * shrout / 1000; *in millions; else if not missing(CSHO) then market_cap=abs(prc) * shrout; *in millions; run; data bm_3; set bm_2; if missing(permno) then delete; else if missing(be) then delete; else if missing(market_cap) then delete; else if missing(cal_year_end) then delete; bm = be / (market_cap); if bm<0 then delete; keep bm permno cal_year_end; run; proc sort data=bm_3 noduprecs; by cal_year_end; run; proc univariate data=bm_3 noprint; var bm; output out=bm_P00_5_P99_5 pctlpre=P_ pctlpts = 0.5 99.5; run; proc sql; create table bm_4 as select a.*, b.P_0_5, b.P_99_5 from bm_3 a inner join bm_P00_5_P99_5 b on 1=1; quit; data bm_5; set bm_4; if bm < P_0_5 then bm = P_0_5; if bm > P_99_5 then bm = P_99_5; run; data bm; set bm_5; * if bm < 0 then delete; drop P_0_5 P_99_5; run; proc delete data=bm_1;run; proc delete data=bm_2;run; proc delete data=bm_3;run; proc delete data=bm_4;run; proc delete data=bm_5;run; proc delete data=bm_p00_5_p99_5;run; proc delete data=bv_1;run; proc delete data=bv_2;run; proc delete data=bv_3;run; proc delete data=bv_4;run; proc delete data=mv_bm_1;run; proc delete data=mv_bm_2;run; proc delete data=mv_bm_3;run; *illiquidity; data h_l_open_close; set a_data_daily; keep permno date shrcd exchcd prc openprc askhi bidlo; run; data illiq_1; set h_l_open_close; if 1 <= EXCHCD <= 3; if 10 <= SHRCD <= 11; year = year(date); month = month(date); run; proc delete data=h_l_open_close;run; proc sql; create table illiq_2 as select a.* from illiq_1 a inner join max_events b on a.permno=b.permno and a.year=b.year_t and a.month=b.month_t; quit; proc delete data=illiq_1;run; proc sql; create table illiq_3 as select a.permno, a.date, a.bidlo, a.askhi, a.prc, a.openprc, b.ret, b.vol from illiq_2 a inner join returns_2 b on a.permno=b.permno and a.date=b.date; quit; proc delete data=illiq_2;run; data illiq_4; set illiq_3 (where=(vol > 0)); prc_abs=abs(prc); bidlo_abs=abs(bidlo); askhi_abs=abs(askhi); openprc_abs=abs(openprc); ret_abs=abs(ret); if prc < 0 then average_price = mean(bidlo_abs,askhi_abs,openprc_abs); else average_price = mean(prc_abs,bidlo_abs,askhi_abs,openprc_abs); run; proc delete data=illiq_3;run; data illiq_5; set illiq_4; vold = average_price * vol; illiq_daily = ret_abs / vold; year=year(date); month=month(date); keep permno date vold ret_abs illiq_daily year month; run; proc delete data=illiq_4;run; proc sort data=illiq_5 out=illiq_6; by permno year month; run; proc delete data=illiq_5;run; proc means data=illiq_6 noprint; by permno year month; var illiq_daily; output out=illiq_7 sum= n= / autoname; run; proc delete data=illiq_6;run; data illiq_8; set illiq_7; illiq = illiq_daily_sum / illiq_daily_n; keep permno year month illiq; run; proc delete data=illiq_7;run; proc univariate data=illiq_8 noprint; var illiq; output out=illiq_P1_P99 pctlpre=P_ pctlpts = 1 99; run; proc sql; create table illiq_9 as select a.*, b.P_1, b.P_99 from illiq_8 a inner join illiq_P1_P99 b on 1=1; quit; data illiq; set illiq_9; if illiq < p_1 then delete; if illiq > p_99 then delete; run; proc delete data=illiq_8;run; proc delete data=illiq_9;run; proc delete data=illiq_P1_P99;run; *momentum and reversal; %let J=11; *create momentum measures based on past (J) month compounded returns; *make sure to keep stocks with available return info in the formation period; proc printto log=junk; proc expand data=monthly_returns_3 (keep=permno date ret) out=umd method=none; by permno; id date; convert ret = momentum / transformin=(+1) transformout=(MOVPROD &J -1 trimleft 10); quit; proc printto; run; data mom; set umd; if not missing(momentum); enddate=intnx('month',date,0); format enddate mmddyy10.; max_month = intnx("month",enddate,+1,'same'); format max_month mmddyy10.; keep permno momentum max_month; run; data rev; set umd; rename ret=reversal; max_month=intnx('month',date,0); format max_month mmddyy10.; run; proc sql; create table mom_rev as select a.permno, a.max_month, a.momentum, b.reversal from mom a inner join rev b on a.permno=b.permno and a.max_month=b.max_month; quit; proc delete data=mom;run; proc delete data=rev;run; proc delete data=umd;run; *skewness; *iskew over one year; data skewcalc_1yr; set max_events; keep permno startofmonth startofnextmonth yearago edate month_t year_t; startofmonth=max_month_t; format startofmonth mmddyy10.; startofnextmonth=postmax_month_t_p1; format startofnextmonth mmddyy10.; yearago=intnx('year',startofnextmonth,-1,'same'); format yearago mmddyy10.; run; proc sort data=skewcalc_1yr noduprecs; by permno; run; %macro skewness_calculation1yr; %do i=1 %to 12; data skewcalc_temp; set skewcalc_1yr (where=(month_t=&i)); run; proc sql; create table skewcalc_3 as select a.permno, a.edate, b.date, b.excessret, b.mktrf_, b.smb_, b.hml_, b.umd_, b.ret from skewcalc_temp a inner join returns_2 b on a.permno=b.permno and a.yearago <= b.date < a.startofnextmonth order by permno, edate; quit; proc delete data=skewcalc_temp; run; proc reg data=skewcalc_3 outest=sskew_1 noprint; by permno edate; model excessret = mktrf_ smb_ hml_ umd_; output out=iskew_1 r=r; quit; proc delete data=skewcalc_3; run; proc means data=iskew_1 noprint; by permno edate; var r; output out=iskew_var_1 skew=iskew; run; proc delete data=iskew_1; run; data iskew_var_end_&i; set iskew_var_1 (where=(_FREQ_>=200)); drop _TYPE_ _freq_; run; proc delete data=iskew_var_1; run; %end; data iskew_var_all; set iskew_var_end_1-iskew_var_end_12; run; %mend; %skewness_calculation1yr; %macro deletetemps; %do i=1 %to 12; proc delete data=iskew_var_end_&i; run; %end; %mend; %deletetemps; proc delete data=skewcalc_1yr;run; proc delete data=sskew_1;run; *volatility; *ivol over 3 months (quarter year); data volcalc_qtr; set max_events; keep permno startofmonth startofnextmonth qtrago edate month_t year_t; startofmonth=max_month_t; format startofmonth mmddyy10.; startofnextmonth=postmax_month_t_p1; format startofnextmonth mmddyy10.; qtrago=intnx('month',startofnextmonth,-3,'same'); format qtrago mmddyy10.; run; proc sort data=volcalc_qtr noduprecs; by permno; run; %macro volatility_calculationqtr; %do i=1 %to 12; data volcalc_temp; set volcalc_qtr (where=(month_t=&i)); run; proc sql; create table volcalc_3 as select a.permno, a.edate, b.date, b.excessret, b.mktrf_, b.smb_, b.hml_, b.umd_, b.ret from volcalc_temp a inner join returns_2 b on a.permno=b.permno and a.qtrago <= b.date < a.startofnextmonth order by permno, edate; quit; proc delete data=volcalc_temp; run; proc reg data=volcalc_3 outest=svol_1 noprint; by permno edate; model excessret = mktrf_ smb_ hml_ umd_; output out=ivol_1 r=r; quit; proc delete data=volcalc_3; run; proc means data=ivol_1 noprint; by permno edate; var r; output out=ivol_var_1 std=ivol; run; proc delete data=ivol_1; run; data ivol_var_end_&i; set ivol_var_1 (where=(_FREQ_>=50)); drop _TYPE_ _freq_; run; proc delete data=ivol_var_1; run; %end; data ivol_var_all; set ivol_var_end_1-ivol_var_end_12; run; %mend; %volatility_calculationqtr; %macro deletetemps; %do i=1 %to 12; proc delete data=ivol_var_end_&i; run; %end; %mend; %deletetemps; proc delete data=volcalc_qtr;run; proc delete data=svol_1;run; *combine results; proc sql; create table combined_1 as select a.permno, a.edate, a.max_month_t, a.postmax_month_t_p1, a.decile, a.event_ret, b.price, b.market_cap from max_events a inner join mktcap_price b on a.permno=b.permno and a.max_month_t=b.max_month_t; quit; proc sql; create table combined_2 as select a.*, b.ivol from combined_1 a inner join ivol_var_all b on a.permno=b.permno and a.edate=b.edate; quit; proc sql; create table combined_3 as select a.*, b.iskew from combined_2 a inner join iskew_var_all b on a.permno=b.permno and a.edate=b.edate; quit; proc sql; create table combined_4 as select a.*, b.illiq from combined_3 a inner join illiq b on a.permno=b.permno and month(a.edate)=b.month and year(a.edate)=b.year; quit; proc sql; create table combined_5 as select a.*, b.momentum, b.reversal from combined_4 a inner join mom_rev b on a.permno=b.permno and a.max_month_t=b.max_month; quit; data combined_6; set combined_5; last_year=year(edate)-1; run; proc sql; create table combined_7 as select a.*, b.bm from combined_6 a inner join bm b on a.permno=b.permno and a.last_year=b.cal_year_end; quit; proc sort data=combined_7 noduprecs; by permno edate; run; data combined_8; retain permno edate max_month_t decile max mcap price bm illiq ivol_ iskew rev mom; set combined_7; max=event_ret*100; mcap=market_cap/1000; illiq=illiq*(10**5); ivol_=ivol*100; rev=reversal*100; mom=momentum*100; drop event_ret market_cap ivol momentum reversal last_year; run; *semi-annual periods for expected hi-max prediction; data combined_9; set combined_8; log_price=log(price); log_size=log(mcap); if month(edate) < 7 then halfyear=0; if month(edate) > 6 then halfyear=5; period=year(edate)*10 + halfyear; run; proc sort noduprecs data=combined_9 out=combined; by permno edate; run; %macro deletetemps; %do i=1 %to 9; proc delete data=combined_&i; run; %end; %mend; %deletetemps; *re-ranking for high max due to events lost along the way due to missing data; data decilecalc_1; set combined; keep permno edate max max_month_t postmax_month_t_p1; run; proc sort data=decilecalc_1 noduprecs; by max_month_t; run; proc rank data=decilecalc_1 out=new_deciles groups=10; by max_month_t; var max; ranks decile; run; proc sql; create table combined_2 as select a.*, b.decile from combined (drop=decile) a inner join new_deciles b on a.permno=b.permno and a.max_month_t=b.max_month_t; quit; data elist_all_dums; set new_deciles; if decile=9 then highd=1; else highd=0; run; proc sql; create table combined_3 as select a.*, b.highd_t_p1, b.max_t_p1, b.edate_t_p1 from combined_2 a inner join elist_all_dums (rename=(highd=highd_t_p1 max=max_t_p1 edate=edate_t_p1)) b on a.permno=b.permno and a.postmax_month_t_p1=b.max_month_t; quit; data combined_new; set combined_3; if year(edate) < 1962 then delete; if year(edate) = 1962 and month(edate) < 6 then delete; if year(edate) > 2005 then delete; if year(edate) = 2005 and month(edate) > 10 then delete; run; proc delete data=combined_2;run; proc delete data=combined_3;run; *expected hi-max prediction model; %macro predictions (decile, where, savename, input); %do i=0 %to 82; data templogit; set &input (where=(period <= (19640+5*&i))); run; proc logistic data=templogit outest=templogit2; model highd_t_p1(event='1')=log_size bm mom rev illiq ivol_ iskew log_price; run; data probit_ests_&i; set templogit2; period=19640+5*&i; run; proc delete data=templogit templogit2; run; %end; data probit_ests_nodum; set probit_ests_0-probit_ests_82; periodusedin=period+5; run; run; %mend; %predictions (decile_t_p1, , entiresample, combined_new); %macro deletetemps; %do i=0 %to 82; proc delete data=probit_ests_&i; run; %end; %mend; %deletetemps; proc delete data=decilecalc_1;run; proc delete data=elist_all_dums;run; *for Table 2; proc means data=probit_ests_nodum noprint; output out=z_Table2; run; proc sql; create table prediction_1_nd as select a.*, b.* from combined_new a inner join probit_ests_nodum (drop=period rename=(log_size=coeff_log_size bm=coeff_bm mom=coeff_mom rev=coeff_rev illiq=coeff_illiq ivol_=coeff_ivol iskew=coeff_iskew log_price=coeff_log_price)) b on a.period=b.periodusedin; quit; data prediction_2_nd; set prediction_1_nd; ehi=1 / (1+ exp(-(intercept + log_size*coeff_log_size + bm*coeff_bm + mom*coeff_mom + rev*coeff_rev + illiq*coeff_illiq + ivol_*coeff_ivol + iskew*coeff_iskew + log_price*coeff_log_price))); run; proc sort noduprecs data=prediction_2_nd; by max_month_t; run; proc rank data=prediction_2_nd out=prediction_3_nd groups=10; by max_month_t; var ehi; ranks ehideciles; run; *4 portfolio classifications: 1) expected and actual hi-max, 2) expected non-hi-max but actual hi-max; *3) expected hi-max but actual non-hi-max, 4) expected and actual non-hi-max; data prediction_4_nd; set prediction_3_nd; if highd_t_p1=1 and ehideciles=9 then ehi_portnum=1; if highd_t_p1=1 and ehideciles^=9 then ehi_portnum=2; if highd_t_p1^=1 and ehideciles=9 then ehi_portnum=3; if highd_t_p1^=1 and ehideciles^=9 then ehi_portnum=4; postmax_month_t_p2 = intnx("month",max_month_t,+2,'same'); format postmax_month_t_p2 mmddyy10.; drop _LINK_ _TYPE_ _STATUS_ _NAME_ _LNLIKE_ _ESTTYPE_; run; proc sort data=prediction_4_nd noduprecs; by ehi_portnum; run; proc delete data=prediction_1_nd;run; proc delete data=prediction_2_nd;run; proc delete data=prediction_3_nd;run; *merg returns and factors onto event file; *max_month_t is the month of the control variables (similar to BCW); *postmax_month_t_p1 is the month following this, this is the event month and the month we are predicting expected hi max; *postmax_month_t_p2 is the following month, i.e. the month the portfolio is held; data monthly_returns_4; set monthly_returns_3; ret_=ret*100; run; proc delete data=monthly_returns_3;run; proc sql; create table portfolios_1 as select a.*, b.ret_, b.rf, b.mktrf, b.smb, b.hml, b.umd, b.st_rev from prediction_4_nd a inner join monthly_returns_4 b on a.permno=b.permno and month(a.postmax_month_t_p2)=b.month and year(a.postmax_month_t_p2)=b.year; quit; data portfolios_2; set portfolios_1; if missing(permno) then delete; retxrf=ret_-rf; run; proc sort noduprecs data=portfolios_2 out=full_sample; by permno edate; run; proc delete data=portfolios_1;run; proc delete data=portfolios_2;run; *expected hi max prediction success; proc sort data=prediction_4_nd; by ehi_portnum postmax_month_t_p1; run; proc means data=prediction_4_nd noprint; class ehi_portnum postmax_month_t_p1; var ehi_portnum; output out=himax_count_nd; run; data himax_count_nd; set himax_count_nd; if _TYPE_ = 3; if _STAT_ = "N"; drop _TYPE_ _STAT_; run; *Table 4 results: macro to calculate alphas and sharpe ratios for each of the different portfolios in addition to the differences between portfolios; %macro table_4_alphas_sharpes (weighting, type); proc sort noduprecs data=full_sample out=temp1; by ehi_portnum postmax_month_t_p2; run; *find the total market cap for all each portfolio for each month so we can find the weight for each stock in each portfolio; proc means data=temp1 noprint; var mcap; by ehi_portnum postmax_month_t_p2; output out=temp2 sum=sumcap; run; *join this total market cap back onto our dataset; proc sql; create table temp3 as select a.*, b.sumcap from temp1 a inner join temp2 b on a.postmax_month_t_p2=b.postmax_month_t_p2 and a.ehi_portnum = b.ehi_portnum; quit; *weight is the market cap of a firm / total market cap in portfolio; data temp4; set temp3; weight = mcap / sumcap; run; proc sort noduprecs data=temp4 out=temp5; by ehi_portnum postmax_month_t_p2; run; *if we have value-weighted portfolios, weight the portfolios by weight that we have calculated; %if &weighting='vw' %then %do; proc means data=temp5 noprint; by ehi_portnum postmax_month_t_p2; weight weight; var ret_ retxrf mktrf hml smb umd st_rev; output out=temp6 mean=; run; %end; *if we have equal-weighted portfolios, we do not need to weight the mean; %if &weighting='ew' %then %do; proc means data=temp5 noprint; by ehi_portnum postmax_month_t_p2; var ret_ retxrf mktrf hml smb umd st_rev; output out=temp6 mean=; run; %end; proc sort noduprecs data=temp6 out=temp7; by ehi_portnum; run; *alpha and nw t stat 12 lags; ods output parameterestimates=temp8; ods listing close; proc model data=temp7; endo retxrf; exog mktrf smb hml umd st_rev; instruments _exog_; parms b0 b1 b2 b3 b4 b5; by ehi_portnum; retxrf=b0 + b1*mktrf + b2*smb + b3*hml + b4*umd + b5*st_rev; fit retxrf / gmm kernel=(bart,13,0) vardef=n; run; quit; *sharpe ratio; proc means data=temp7 noprint; by ehi_portnum; var retxrf; output out=sr_1 mean= std= / autoname; quit; data alpha_t1; set temp8; if parameter='b0'; run; proc sql; create table tb_4_t1 as select a.*, b.alpha, b.alpha_t from sr_1 a inner join alpha_t1 (rename=(estimate=alpha tValue=alpha_t)) b on a.ehi_portnum=b.ehi_portnum; quit; *format table; data temp9; retain order ehi_portnum estimate tvalue; set temp8; if ehi_portnum=2 then order=2; else if ehi_portnum=3 then order=3; else order=ehi_portnum; if parameter='b0'; drop esttype stderr probt df parameter; run; proc sort data=temp9; by order; run; proc transpose data=temp9 out=temp10; id order; run; quit; data temp; set temp10; drop _LABEL_; run; *create longshort portfolio; *create a long_dummy indicating that we are long expHiMAX & actHiMAX (long_dummy =1) short expHiMAX & actNonHiMAX (long_dummy =0); data longshort_1; set temp6 (where=(ehi_portnum=1 or ehi_portnum=2)); if ehi_portnum=1 then long_dummy=1; else long_dummy=0; d_mktrf=long_dummy*mktrf; d_smb=long_dummy*smb; d_hml=long_dummy*hml; d_umd=long_dummy*umd; d_st_rev=long_dummy*st_rev; run; *create longshort portfolio; *create a long_dummy indicating that we are long expNonHiMAX & actHiMAX (long_dummy =1) short expNonHiMAX & actNonHiMAX (long_dummy =0); data longshort_2; set temp6 (where=(ehi_portnum=3 or ehi_portnum=4)); if ehi_portnum=3 then long_dummy=1; else long_dummy=0; d_mktrf=long_dummy*mktrf; d_smb=long_dummy*smb; d_hml=long_dummy*hml; d_umd=long_dummy*umd; d_st_rev=long_dummy*st_rev; run; proc sort noduprecs data=longshort_1; by postmax_month_t_p2; run; proc sort noduprecs data=longshort_2; by postmax_month_t_p2; run; *alpha and nw t stat; ods output parameterestimates=longshort_3; ods listing close; proc model data=longshort_1; endo retxrf; exog long_dummy d_mktrf d_smb d_hml d_umd d_st_rev mktrf smb hml umd st_rev; instruments _exog_; parms b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11; retxrf=b0 +b1*long_dummy + b2*mktrf + b3*smb + b4*hml + b5*umd + b6*d_mktrf+ b7*d_smb+ b8*d_hml + b9*d_umd +b10*d_st_rev +b11*st_rev; fit retxrf / gmm kernel=(bart,13,0) vardef=n; run; quit; *alpha and nw t stat; ods output parameterestimates=longshort_4; ods listing close; proc model data=longshort_2; endo retxrf; exog long_dummy d_mktrf d_smb d_hml d_umd d_st_rev mktrf smb hml umd st_rev; instruments _exog_; parms b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11; retxrf=b0 +b1*long_dummy + b2*mktrf + b3*smb + b4*hml + b5*umd + b6*d_mktrf+ b7*d_smb+ b8*d_hml + b9*d_umd +b10*d_st_rev +b11*st_rev; fit retxrf / gmm kernel=(bart,13,0) vardef=n; run; quit; *formatting and joining datasets together to form table; data ls_left_1; retain order estimate tvalue; set longshort_3; order=1; if parameter = 'b1'; keep order estimate tvalue; run; data ls_right_1; retain order estimate tvalue; set longshort_4; order=2; if parameter = 'b1'; keep order estimate tvalue; run; data ls_combined_1; set ls_left_1 ls_right_1; run; proc transpose data=ls_combined_1 out=ls_combined_2; id order; run; quit; data ls_combined; set ls_combined_2(rename=(_1=_2x1 _2=_4x3)); run; proc sql; create table temporder1 as select a._NAME_, a._1, a._2, b._2x1, a._3, a._4, b._4x3 from temp a left join ls_combined b on a._NAME_=b._NAME_; quit; proc transpose data=temporder1 out=temporder2; run; data temporder3; retain _NAME_ ehi_portnum estimate tvalue; set temporder2; run; proc transpose data=temporder3 out=tb4_&type; run; proc means data=temp7 noprint; by ehi_portnum; var retxrf; output out=tb4_sr_&type mean= std= / autoname; quit; data alpha_t1; set temp8; if parameter='b0'; run; proc sql; create table tb_4_t1 as select a.*, b.alpha, b.alpha_t from sr_1 a inner join alpha_t1 (rename=(estimate=alpha tValue=alpha_t)) b on a.ehi_portnum=b.ehi_portnum; quit; data sr_longshort_1x2; set temp6 (where=(ehi_portnum=1 or ehi_portnum=2)); longshort='1x2'; if ehi_portnum = 1 then portret=ret_; else portret=-ret_; rf = ret_-retxrf; portretxrf=portret-rf; ehi_portnum=0; run; data sr_longshort_3x4; set temp6 (where=(ehi_portnum=3 or ehi_portnum=4)); longshort='3x4'; if ehi_portnum = 3 then portret=ret_; else portret=-ret_; rf = ret_-retxrf; portretxrf=portret-rf; ehi_portnum=0; run; data sr_longshort_1x3; set temp6 (where=(ehi_portnum=1 or ehi_portnum=3)); longshort='1x3'; if ehi_portnum = 1 then portret=ret_; else portret=-ret_; rf = ret_-retxrf; portretxrf=portret-rf; ehi_portnum=0; run; data sr_longshort_2x4; set temp6 (where=(ehi_portnum=2 or ehi_portnum=4)); longshort='2x4'; if ehi_portnum = 2 then portret=ret_; else portret=-ret_; rf = ret_-retxrf; portretxrf=portret-rf; ehi_portnum=0; run; data sr_longshort_0; set sr_longshort_1x2 sr_longshort_3x4 sr_longshort_1x3 sr_longshort_2x4 temp6; run; data sr_longshort_1; set sr_longshort_0; if missing(longshort) then longshort = ehi_portnum; if missing(portret) then portret = retxrf; run; proc sort data=sr_longshort_1; by longshort postmax_month_t_p2; run; proc means data=sr_longshort_1 noprint; by longshort postmax_month_t_p2; var portret; output out=sr_longshort_2 sum=; run; proc sql; create table sr_longshort_3 as select a.*, b.mktrf , b.smb , b.hml , b.umd , b.st_rev from sr_longshort_2 a inner join temp7 (where=(ehi_portnum=1)) b on a.postmax_month_t_p2=b.postmax_month_t_p2; quit; proc means data=sr_longshort_2 noprint; by longshort; var portret; output out=tb4_longshort_sharpe mean= std= / autoname; run; proc sort data=sr_longshort_3 noduprecs; by longshort; run; ods output parameterestimates=sr_longshort_4; ods listing close; proc model data=sr_longshort_3; by longshort; endo portret; exog mktrf smb hml umd st_rev; instruments _exog_; parms b0 b1 b2 b3 b4 b5; portret=b0 +b1*mktrf + b2*smb + b3*hml + b4*umd + b5*st_rev; fit portret / gmm kernel=(bart,13,0) vardef=n; run; quit; data tb4_longshort_alphas; set sr_longshort_4; if parameter = 'b0'; run; data z_table4_alpha_&type; set tb4_longshort_alphas; run; data z_table4_sharpe_&type; set tb4_longshort_sharpe; sharpe_ratio=portret_mean/portret_stddev; run; proc delete data=temp1-temp10 ls_combined ls_combined_1 ls_combined_2; run; proc delete data=temp longshort_1-longshort_4 ls_right_1 ls_left_1 temporder1-temporder3; run; proc delete data=tb4_longshort_sharpe tb4_longshort_alphas sr_longshort_0-sr_longshort_4 sr_longshort_1x2 sr_longshort_3x4 sr_longshort_1x3 sr_longshort_2x4; run; proc delete data=alpha_t1 sr_1 tb_4_t1; run; proc delete data=tb4_&type tb4_sr_&type; run; %mend; %table_4_alphas_sharpes ('vw', vw); %table_4_alphas_sharpes ('ew', ew);