Rui Dai - CRSP-Compustat (2024)

/******************** PCCM.SAS ********************/

/********************Author: Rui Dai********************/

proc sql;

create table Secm (where=(prccm>0)) as

select distinct a.gvkey, a.iid, intnx('month',datadate,0,'e') as datadate format=date9.,

round(prccm, 0.01) as prccm

from comp.Secm(where=(lengthn(strip(iid))=2))/*USA Firm Only*/ as a

inner join (

select distinct gvkey from comp.funda

where sale>. or at>. /*Firm with Sales or Total Asset only*/

) as b

on a.gvkey=b.gvkey;

quit;

/*CRSP Monthly Security File Cleansing*/

proc sql;

create table msf(where=(prc>0)) as

select distinct a.permno, intnx('month',date,0,'e') as date format=date9.,

round(abs(prc), 0.01) as prc

from crsp.msf as a inner join crsp.msenames(where=(shrcd in (10 11 12 30 31 32))) as b

on a.permno=b.permno;

quit;

/*CUSIP Matching*/

proc sql;

create table cusip_match as

select distinct a.gvkey, a.iid, b.permno

from comp.security as a inner join crsp.msenames as b

on substr(a.cusip,1,8)=substr(b.ncusip,1,8)

inner join (select distinct permno from msf) as c

on b.permno=c.permno

inner join (select distinct gvkey, iid from secm) as d

on a.gvkey=d.gvkey and a.iid=d.iid;

quit;

/*A Blind Price Matching*/

proc sql;

create table _pccm_ as

select distinct a.gvkey, permno, a.iid

from Secm as a inner join msf as b

on datadate=date and prc>0.5 and prc=prccm /*Removing very long price matching*/

group by b.permno, a.gvkey, a.iid

having count(distinct datadate)> 7 ; /*Parameter 2*/

quit;

/*Refine matching with time dimension*/

proc sql;

create table meta as

select distinct a.permno, intnx('month',date,0,'e') as DATE format=date9.,

b.gvkey, b.iid, prc, prccm, prc>0.5 and prc=prccm as sdum /*Removing very long price matching*/

from msf as a inner join _pccm_ as b

on a.permno=b.permno

inner join Secm as c

on b.gvkey=c.gvkey and b.iid=c.iid and datadate=date

order by permno, gvkey, iid, date;

quit;

/*Counting backwards to fine-tuning the matched point*/

PROC EXPAND data=meta OUT=meta METHOD=NONE;

by permno gvkey iid;

CONVERT sdum = MS_sdum / TRANSFORM=(MOVSUM 15); /*Parameter 2*/

id date;

RUN;

/* Counting forwards for fine-tuning the matched point (Three steps:)*/

proc sort data=meta out=meta; by permno gvkey iid descending date; run;

data meta; set meta; by permno gvkey iid descending date;

idx+1;

if first.iid then idx=1;

run;

PROC EXPAND data=meta OUT=meta METHOD=NONE;

by permno gvkey iid;

CONVERT MS_sdum = MS_mdum / TRANSFORM=(MOVmax 15); /*Parameter 2*/

id idx;

RUN;

/*Keep consequent matched price and date points*/

proc sql undo_policy=none;

create table meta as

select distinct *, sum(sdum) as hcnt "historical matched count"

from meta(drop=idx)

where MS_mdum>=1 and MS_sdum>=1

group by permno, gvkey, iid

having max(ms_sdum)> 7 /*Parameter 2*/

order by permno, gvkey, iid, date;

quit;

/*Among the remained matched ones, count the closest spelling distance*/

proc sql;

create table mname_match as

select distinct a.gvkey, a.iid, a.permno, spedis(upcase(compress(COMNAM,,'sp')), upcase(compress(conm,,'sp'))) as spedis

from (select distinct gvkey, iid, permno from meta) as a

left join crsp.msenames as b

on a.permno=b.permno

left join comp.company as c

on a.gvkey=c.gvkey

group by a.permno, a.gvkey, a.iid

having spedis(COMNAM, conm)=max(spedis(COMNAM, conm));

quit;

/*Among the remained matched ones, check whether same tickers are evered shared*/

proc sql;

create table tick_match as

select distinct a.gvkey, a.iid, a.permno, scan(upcase(b.ticker),1) = scan(upcase(c.tic),1) as tmatched

from (select distinct gvkey, iid, permno from meta) as a

left join crsp.msenames(where=(lengthn(TICKER)>0)) as b

on a.permno=b.permno

left join comp.Security(where=(lengthn(tic)>0)) as c

on a.gvkey=c.gvkey and a.iid=c.iid

group by a.permno, a.gvkey, a.iid

having (scan(upcase(b.ticker),1) = scan(upcase(c.tic),1))

=max(scan(upcase(b.ticker),1) = scan(upcase(c.tic),1));

quit;

/*Matched name and ticker measures back to consequent matching results and

construct key monthly statistics*/

proc sql;

create table _pccm_ as

select distinct a.*, max(hcnt) as mhcnt, max(MS_sdum) as mms_sdum, max(ms_mdum) as mms_mdum,

count(distinct catt(a.gvkey, a.iid)) as gcnt, b.permno>. as cusip_matched, spedis,

max(b.permno>.) as mcusip_matched, tmatched, max(tmatched) as mtmatched

from meta as a left join cusip_match as b

on a.permno=b.permno and a.gvkey=b.gvkey and a.iid=b.iid

left join mname_match as c

on a.permno=c.permno and a.gvkey=c.gvkey and a.iid=c.iid

left join Tick_match as d

on a.permno=d.permno and a.gvkey=d.gvkey and a.iid=d.iid

group by a.permno, date

order by a.permno, date, a.gvkey, a.iid;

quit;

/*Cleansing procedure:

Among multiple matches, keep the ones with either cusip matched or ticker matched

For the remainig ones, some random statistics are used to remove wrong matched results*/

data _pccm_; set _pccm_;

if gcnt>1 and mcusip_matched=1 and cusip_matched^=1 then delete; /*Keep CUSIP Matched Only*/

else if gcnt>1 and mtmatched=1 and tmatched^=1 then delete; /*Keep CUSIP Matched Only*/

if mcusip_matched=0 and mtmatched=0 and spedis>10 and MS_mdum<=3 and hcnt<5 then delete;/*Keep the ones with similar names but matched only a few times - Value decided by observations*/

else if mcusip_matched=0 and mtmatched=0 and spedis>30 and hcnt<15 then delete;/*Keep the ones with different names but with a lot matches - Value decided by observations*/

drop mhcnt mms_sdum mms_mdum gcnt mcusip_matched mtmatched;

run;

/*To have the best one-to-one (permno to gvkey iid) match*/

proc sql;

create table pccm as

select distinct *

from (

select distinct *

from (

select distinct *

from (

select distinct *

from _pccm_

group by permno, date

having hcnt=max(hcnt) /*1) among the current best, get the best historical best*/

)

group by permno, date

having MS_sdum=max(MS_sdum) /*2) at given day, get the best matches*/

)

group by gvkey, iid, date

having hcnt=max(hcnt)

)

group by gvkey, iid, date

having MS_sdum=max(MS_sdum) ;

quit;

/*Patching based on observations */

proc sql undo_policy=none;

create table pccm as

select distinct *

from (

select distinct *

from pccm

group by permno, gvkey, date

having iid=min(iid)

)

group by permno, date

having spedis=min(spedis)

order by permno, date;

quit;

/*Add Time dimensions to PERMNO_to_IID through CUSIP*/

proc sql;

create table cusip_match_valid as

select distinct a.permno, a.date, b.gvkey, b.iid

from msf as a inner join Cusip_match as b

on a.permno=b.permno

inner join (

select distinct gvkey, datadate from comp.funda

where sale>. or at>.

) as c

on b.gvkey=c.gvkey and

intnx('month', datadate, -24, 'e')<a.date<=intnx('month', datadate, 12, 'e')

left join pccm as d

on a.permno=d.permno and b.gvkey=d.gvkey and b.iid=d.iid and a.date=d.date

group by a.permno, a.date

having (d.permno>.)=max(d.permno>.) and count(distinct catt(gvkey,iid))=1;

quit;

/*Combine CUSIP and PRICE match*/

proc sql;

create table _link_table_ as

select distinct a.permno, a.date, b.gvkey, b.iid,

c.gvkey as cusip_gvkey, c.iid as cusip_iid

from msf as a left join pccm as b

on a.permno=b.permno and a.date=b.date

left join cusip_match_valid as c

on a.permno=c.permno and a.date=c.date

order by permno, gvkey, iid, date;

quit;

/*Making a range (two steps)*/

data _link_table_(where=(lengthn(gvkey)>0)); set _link_table_;

if gvkey="" and iid="" then do;

gvkey=cusip_gvkey; iid=cusip_iid;

end;

drop cusip_gvkey cusip_iid;

proc sort; by permno gvkey iid date;

run;

data _link_table_; set _link_table_; by permno gvkey iid date;

retain from;

ldate=lag(date);

if first.iid then do; ldate=.; from=date; end;

else if intnx('month',ldate, 1, 'e')<intnx('month',date, 0, 'e') then

from=date;

format from date9.;

run;

/*Save the matched Table*/

proc sql;

create table pccm_table as

select distinct permno, gvkey, iid, from, date as to format=date9.

from _link_table_

group by permno, gvkey, iid, from

having date=max(date);

quit;

Rui Dai - CRSP-Compustat (2024)

References

Top Articles
I've tried many smart locks, but this is the one I keep on my door
Mark Zuckerberg says we’ll all be wearing AI smart glasses – he’s not wrong, but some big changes need to happen first
1970 Chevrolet Chevelle SS - Skyway Classics
Hk Jockey Club Result
Polyhaven Hdri
Gunshots, panic and then fury - BBC correspondent's account of Trump shooting
Nm Remote Access
Lycoming County Docket Sheets
Braums Pay Per Hour
The Binding of Isaac
Chile Crunch Original
Games Like Mythic Manor
New Stores Coming To Canton Ohio 2022
Michael Shaara Books In Order - Books In Order
Powerball winning numbers for Saturday, Sept. 14. Check tickets for $152 million drawing
Honda cb750 cbx z1 Kawasaki kz900 h2 kz 900 Harley Davidson BMW Indian - wanted - by dealer - sale - craigslist
Recap: Noah Syndergaard earns his first L.A. win as Dodgers sweep Cardinals
Eine Band wie ein Baum
Craigslist Prescott Az Free Stuff
Conan Exiles Sorcery Guide – How To Learn, Cast & Unlock Spells
Gilchrist Verband - Lumedis - Ihre Schulterspezialisten
Giantbodybuilder.com
Craigslist Fort Smith Ar Personals
3 Ways to Drive Employee Engagement with Recognition Programs | UKG
Section 408 Allegiant Stadium
Current Students - Pace University Online
How to Use Craigslist (with Pictures) - wikiHow
Vip Lounge Odu
Shauna's Art Studio Laurel Mississippi
Duke Energy Anderson Operations Center
Gus Floribama Shore Drugs
Pnc Bank Routing Number Cincinnati
Joplin Pets Craigslist
About Us | SEIL
Best Weapons For Psyker Darktide
9781644854013
Hindilinks4U Bollywood Action Movies
Zasilacz Dell G3 15 3579
Bernie Platt, former Cherry Hill mayor and funeral home magnate, has died at 90
Metro Pcs Forest City Iowa
Scarlet Maiden F95Zone
Casamba Mobile Login
Ucsc Sip 2023 College Confidential
Actor and beloved baritone James Earl Jones dies at 93
Best Haircut Shop Near Me
Plasma Donation Greensburg Pa
25100 N 104Th Way
Grace Family Church Land O Lakes
St Als Elm Clinic
Roller Znen ZN50QT-E
How Did Natalie Earnheart Lose Weight
Comenity/Banter
Latest Posts
Article information

Author: Ray Christiansen

Last Updated:

Views: 6042

Rating: 4.9 / 5 (49 voted)

Reviews: 80% of readers found this page helpful

Author information

Name: Ray Christiansen

Birthday: 1998-05-04

Address: Apt. 814 34339 Sauer Islands, Hirtheville, GA 02446-8771

Phone: +337636892828

Job: Lead Hospitality Designer

Hobby: Urban exploration, Tai chi, Lockpicking, Fashion, Gunsmithing, Pottery, Geocaching

Introduction: My name is Ray Christiansen, I am a fair, good, cute, gentle, vast, glamorous, excited person who loves writing and wants to share my knowledge and understanding with you.