/******************** PCCM.SAS ********************/
/********************Author: Rui Dai********************/
proc sql;
create table Secm (where=(prccm>0)) as
select distinct a.gvkey, a.iid, intnx('month',datadate,0,'e') as datadate format=date9.,
round(prccm, 0.01) as prccm
from comp.Secm(where=(lengthn(strip(iid))=2))/*USA Firm Only*/ as a
inner join (
select distinct gvkey from comp.funda
where sale>. or at>. /*Firm with Sales or Total Asset only*/
) as b
on a.gvkey=b.gvkey;
quit;
/*CRSP Monthly Security File Cleansing*/
proc sql;
create table msf(where=(prc>0)) as
select distinct a.permno, intnx('month',date,0,'e') as date format=date9.,
round(abs(prc), 0.01) as prc
from crsp.msf as a inner join crsp.msenames(where=(shrcd in (10 11 12 30 31 32))) as b
on a.permno=b.permno;
quit;
/*CUSIP Matching*/
proc sql;
create table cusip_match as
select distinct a.gvkey, a.iid, b.permno
from comp.security as a inner join crsp.msenames as b
on substr(a.cusip,1,8)=substr(b.ncusip,1,8)
inner join (select distinct permno from msf) as c
on b.permno=c.permno
inner join (select distinct gvkey, iid from secm) as d
on a.gvkey=d.gvkey and a.iid=d.iid;
quit;
/*A Blind Price Matching*/
proc sql;
create table _pccm_ as
select distinct a.gvkey, permno, a.iid
from Secm as a inner join msf as b
on datadate=date and prc>0.5 and prc=prccm /*Removing very long price matching*/
group by b.permno, a.gvkey, a.iid
having count(distinct datadate)> 7 ; /*Parameter 2*/
quit;
/*Refine matching with time dimension*/
proc sql;
create table meta as
select distinct a.permno, intnx('month',date,0,'e') as DATE format=date9.,
b.gvkey, b.iid, prc, prccm, prc>0.5 and prc=prccm as sdum /*Removing very long price matching*/
from msf as a inner join _pccm_ as b
on a.permno=b.permno
inner join Secm as c
on b.gvkey=c.gvkey and b.iid=c.iid and datadate=date
order by permno, gvkey, iid, date;
quit;
/*Counting backwards to fine-tuning the matched point*/
PROC EXPAND data=meta OUT=meta METHOD=NONE;
by permno gvkey iid;
CONVERT sdum = MS_sdum / TRANSFORM=(MOVSUM 15); /*Parameter 2*/
id date;
RUN;
/* Counting forwards for fine-tuning the matched point (Three steps:)*/
proc sort data=meta out=meta; by permno gvkey iid descending date; run;
data meta; set meta; by permno gvkey iid descending date;
idx+1;
if first.iid then idx=1;
run;
PROC EXPAND data=meta OUT=meta METHOD=NONE;
by permno gvkey iid;
CONVERT MS_sdum = MS_mdum / TRANSFORM=(MOVmax 15); /*Parameter 2*/
id idx;
RUN;
/*Keep consequent matched price and date points*/
proc sql undo_policy=none;
create table meta as
select distinct *, sum(sdum) as hcnt "historical matched count"
from meta(drop=idx)
where MS_mdum>=1 and MS_sdum>=1
group by permno, gvkey, iid
having max(ms_sdum)> 7 /*Parameter 2*/
order by permno, gvkey, iid, date;
quit;
/*Among the remained matched ones, count the closest spelling distance*/
proc sql;
create table mname_match as
select distinct a.gvkey, a.iid, a.permno, spedis(upcase(compress(COMNAM,,'sp')), upcase(compress(conm,,'sp'))) as spedis
from (select distinct gvkey, iid, permno from meta) as a
left join crsp.msenames as b
on a.permno=b.permno
left join comp.company as c
on a.gvkey=c.gvkey
group by a.permno, a.gvkey, a.iid
having spedis(COMNAM, conm)=max(spedis(COMNAM, conm));
quit;
/*Among the remained matched ones, check whether same tickers are evered shared*/
proc sql;
create table tick_match as
select distinct a.gvkey, a.iid, a.permno, scan(upcase(b.ticker),1) = scan(upcase(c.tic),1) as tmatched
from (select distinct gvkey, iid, permno from meta) as a
left join crsp.msenames(where=(lengthn(TICKER)>0)) as b
on a.permno=b.permno
left join comp.Security(where=(lengthn(tic)>0)) as c
on a.gvkey=c.gvkey and a.iid=c.iid
group by a.permno, a.gvkey, a.iid
having (scan(upcase(b.ticker),1) = scan(upcase(c.tic),1))
=max(scan(upcase(b.ticker),1) = scan(upcase(c.tic),1));
quit;
/*Matched name and ticker measures back to consequent matching results and
construct key monthly statistics*/
proc sql;
create table _pccm_ as
select distinct a.*, max(hcnt) as mhcnt, max(MS_sdum) as mms_sdum, max(ms_mdum) as mms_mdum,
count(distinct catt(a.gvkey, a.iid)) as gcnt, b.permno>. as cusip_matched, spedis,
max(b.permno>.) as mcusip_matched, tmatched, max(tmatched) as mtmatched
from meta as a left join cusip_match as b
on a.permno=b.permno and a.gvkey=b.gvkey and a.iid=b.iid
left join mname_match as c
on a.permno=c.permno and a.gvkey=c.gvkey and a.iid=c.iid
left join Tick_match as d
on a.permno=d.permno and a.gvkey=d.gvkey and a.iid=d.iid
group by a.permno, date
order by a.permno, date, a.gvkey, a.iid;
quit;
/*Cleansing procedure:
Among multiple matches, keep the ones with either cusip matched or ticker matched
For the remainig ones, some random statistics are used to remove wrong matched results*/
data _pccm_; set _pccm_;
if gcnt>1 and mcusip_matched=1 and cusip_matched^=1 then delete; /*Keep CUSIP Matched Only*/
else if gcnt>1 and mtmatched=1 and tmatched^=1 then delete; /*Keep CUSIP Matched Only*/
if mcusip_matched=0 and mtmatched=0 and spedis>10 and MS_mdum<=3 and hcnt<5 then delete;/*Keep the ones with similar names but matched only a few times - Value decided by observations*/
else if mcusip_matched=0 and mtmatched=0 and spedis>30 and hcnt<15 then delete;/*Keep the ones with different names but with a lot matches - Value decided by observations*/
drop mhcnt mms_sdum mms_mdum gcnt mcusip_matched mtmatched;
run;
/*To have the best one-to-one (permno to gvkey iid) match*/
proc sql;
create table pccm as
select distinct *
from (
select distinct *
from (
select distinct *
from (
select distinct *
from _pccm_
group by permno, date
having hcnt=max(hcnt) /*1) among the current best, get the best historical best*/
)
group by permno, date
having MS_sdum=max(MS_sdum) /*2) at given day, get the best matches*/
)
group by gvkey, iid, date
having hcnt=max(hcnt)
)
group by gvkey, iid, date
having MS_sdum=max(MS_sdum) ;
quit;
/*Patching based on observations */
proc sql undo_policy=none;
create table pccm as
select distinct *
from (
select distinct *
from pccm
group by permno, gvkey, date
having iid=min(iid)
)
group by permno, date
having spedis=min(spedis)
order by permno, date;
quit;
/*Add Time dimensions to PERMNO_to_IID through CUSIP*/
proc sql;
create table cusip_match_valid as
select distinct a.permno, a.date, b.gvkey, b.iid
from msf as a inner join Cusip_match as b
on a.permno=b.permno
inner join (
select distinct gvkey, datadate from comp.funda
where sale>. or at>.
) as c
on b.gvkey=c.gvkey and
intnx('month', datadate, -24, 'e')<a.date<=intnx('month', datadate, 12, 'e')
left join pccm as d
on a.permno=d.permno and b.gvkey=d.gvkey and b.iid=d.iid and a.date=d.date
group by a.permno, a.date
having (d.permno>.)=max(d.permno>.) and count(distinct catt(gvkey,iid))=1;
quit;
/*Combine CUSIP and PRICE match*/
proc sql;
create table _link_table_ as
select distinct a.permno, a.date, b.gvkey, b.iid,
c.gvkey as cusip_gvkey, c.iid as cusip_iid
from msf as a left join pccm as b
on a.permno=b.permno and a.date=b.date
left join cusip_match_valid as c
on a.permno=c.permno and a.date=c.date
order by permno, gvkey, iid, date;
quit;
/*Making a range (two steps)*/
data _link_table_(where=(lengthn(gvkey)>0)); set _link_table_;
if gvkey="" and iid="" then do;
gvkey=cusip_gvkey; iid=cusip_iid;
end;
drop cusip_gvkey cusip_iid;
proc sort; by permno gvkey iid date;
run;
data _link_table_; set _link_table_; by permno gvkey iid date;
retain from;
ldate=lag(date);
if first.iid then do; ldate=.; from=date; end;
else if intnx('month',ldate, 1, 'e')<intnx('month',date, 0, 'e') then
from=date;
format from date9.;
run;
/*Save the matched Table*/
proc sql;
create table pccm_table as
select distinct permno, gvkey, iid, from, date as to format=date9.
from _link_table_
group by permno, gvkey, iid, from
having date=max(date);
quit;