Attached is a script Ian Craigen provided to the ACL User forum, this script is intended to cleanse common terms and normalize them into common terms. It should be noted that this script was written for somebody in New Zealand, thus may need to be adapted for your own use. It should also be noted that some people on the forum at some issues with some of the code, for the full discussion please check out the ACL Forum at
http://www.acl.com/supportcenter/forums/threadview.aspx?id=9934. Note you do need to have a supported license to join the ACL Forum. If you have to pay for support.
SET FOLDER /_CLEAN_VENDOR_NAMES
SET LOG _CLEAN_VENDOR_NAMES
SET LOOP TO 0
COMMENT
* This script cleans vendor names looking for common words and changing these to abbreviations
* Abbreviations used can be referenced to New Zealand Post Address Standards
END
DELETE FORMAT DEBUG OK
DELETE DEBUG.FIL OK
DELETE FORMAT Common_Words OK
DELETE Common_Words.FIL OK
DELETE FORMAT Common_Words_Duplicates OK
DELETE Common_Words_Duplicates.FIL OK
DELETE FORMAT Common_Words_Classify OK
DELETE Common_Words_Classify.FIL OK
ACCEPT "Table with Vendor names to clean" FIELDS "xf" to x_table_name
OPEN %x_table_name%
DIALOG (DIALOG TITLE "User Dialog" WIDTH 492 HEIGHT 395 ) (BUTTONSET TITLE "&OK;&Cancel" AT 370 12 DEFAULT 1 ) (TEXT TITLE "Select field that has Vendor names" AT 12 28 WIDTH 265 ) (ITEM TITLE "C" TO "v_name_field" AT 12 60 WIDTH 266 HEIGHT 130 ) (TEXT TITLE "Select field - to list if duplicate record" AT 12 208 WIDTH 265 HEIGHT 31 ) (ITEM TITLE "CND" TO "v_list" AT 12 252 WIDTH 261 HEIGHT 133 )
DELETE FORMAT Cleaned_%v_name_field% OK
DELETE Cleaned_%v_name_field%.FIL OK
DELETE FORMAT SUMM_on_%v_name_field% OK
DELETE SUMM_On_%v_name_field%.FIL OK
COUNT
SUMMARIZE ON %v_name_field% %v_list% to SUMM_on_%v_name_field% OPEN
Word = BLANKS(50)
v_len1 = LEN(%v_name_field%)
v_name = BLANKS(%v_len1%)
Clean_%v_name_field%= BLANKS(%v_len1%)
GROUP
Clean_%v_name_field%= BLANKS(%v_len1%)
ASSIGN v_name = UPPER(%v_name_field%)
ASSIGN v_name = REPLACE(v_name,'#',' ')
ASSIGN v_name = REPLACE(v_name,'*',' ')
ASSIGN v_name = REPLACE(v_name,'.',' ')
ASSIGN v_name = REPLACE(v_name,'(',' ')
ASSIGN v_name = REPLACE(v_name,')',' ')
ASSIGN v_name = REPLACE(v_name,' AND ',' & ')
ASSIGN v_name = REPLACE(v_name,'ADMINISTRATION','ADMN.')
ASSIGN v_name = REPLACE(v_name,'AGENCY','AGCY.')
ASSIGN v_name = REPLACE(v_name,'CENTRE','CTR.')
ASSIGN v_name = REPLACE(v_name,'CENTER','CTR.')
ASSIGN v_name = REPLACE(v_name,'COMPANY','CO.')
ASSIGN v_name = REPLACE(v_name,'CORPORATION','CORP.')
ASSIGN v_name = REPLACE(v_name,'DEPARTMENT','DEPT.')
ASSIGN v_name = REPLACE(v_name,'DIVISION','DIV.')
ASSIGN v_name = REPLACE(v_name,'ENTERPRISE','ENTPRS.')
ASSIGN v_name = REPLACE(v_name,'GOVERNMENT','GOVT.')
ASSIGN v_name = REPLACE(v_name,'GROUP','GP.')
ASSIGN v_name = REPLACE(v_name,'HEADQUARTERS','HQ.')
ASSIGN v_name = REPLACE(v_name,'CORPORATION','CORP.')
ASSIGN v_name = REPLACE(v_name,'INCORPORATED','INC.')
ASSIGN v_name = REPLACE(v_name,'LABORATORY','LAB.')
ASSIGN v_name = REPLACE(v_name,'LABORATORIES','LABS.')
ASSIGN v_name = REPLACE(v_name,'LIMITED','LTD.')
ASSIGN v_name = REPLACE(v_name,'MANAGEMENT','MGMT.')
ASSIGN v_name = REPLACE(v_name,'MANUFACTURER','MFR.')
ASSIGN v_name = REPLACE(v_name,'MANUFACTURING','MFG.')
ASSIGN v_name = REPLACE(v_name,'NATIONAL','NATL.')
ASSIGN v_name = REPLACE(v_name,'OFFICE','OFC.')
ASSIGN v_name = REPLACE(v_name,'PARTNERSHIP','PRTNRSHP.')
ASSIGN v_name = REPLACE(v_name,'PROPRIETARY','PTY.')
ASSIGN v_name = REPLACE(v_name,'SYSTEM ','SYS. ')
ASSIGN v_name = REPLACE(v_name,'SYSTEMS','SYS.')
ASSIGN v_name = REPLACE(v_name,'ASSOCIATION','ASSN.')
ASSIGN v_name = REPLACE(v_name,'ASSOC ','ASSN.')
ASSIGN v_name = REPLACE(v_name,'ASSOC.','ASSN.')
ASSIGN v_name = REPLACE(v_name,' THE ',' ')
ASSIGN v_name = REPLACE(v_name,' OF ' ,' ')
ASSIGN v_name = REPLACE(v_name,' FOR ',' ')
ASSIGN v_name = REPLACE(v_name,' IN ' ,' ')
ASSIGN v_name = REPLACE(v_name,' ON ' ,' ')
ASSIGN v_name = REPLACE(v_name,' AS ' ,' ')
ASSIGN v_name = REPLACE(v_name,' DR ' ,' ')
ASSIGN v_name = REPLACE(v_name,' MR ' ,' ')
ASSIGN v_name = REPLACE(v_name,' MRS ' ,' ')
ASSIGN v_name = REPLACE(v_name,' MS ' ,' ')
ASSIGN v_name = REPLACE(v_name,' MISS ' ,' ')
ASSIGN v_name = REPLACE(v_name,'NEW ZEALAND','NZ')
ASSIGN v_name = SUB(INCLUDE(v_name,'QWERTYUIOPLKJHGFDSAZXCVBNM &1234567890')+BLANKS(v_len1),1,v_len1)
v_len = LEN(ALLTRIM(v_name))
Loop_Ctr_max = Occurs(ALLTRIM(v_name)," ")+1
Loop_Ctr = 1
LOOP while Loop_Ctr <= Loop_Ctr_max
Word = SUB(SPLIT(ALLTRIM(v_name)," ",Loop_Ctr)+BLANKS(50),1,50)
EXTRACT Recno() AS "Rec_Num" Loop_Ctr AS "Word_Num" %v_name_field% Word TO Common_Words IF NOT(ISBLANK(Word))
Clean_%v_name_field% = ALLTRIM(ALLTRIM(Clean_%v_name_field%) + ' ' + ALLTRIM(Word)) IF NOT ( ISBLANK(Word) OR (Loop_ctr = 1 AND MATCH(Word,"THE","MR","DR","MRS","MS","MISS")))
Loop_Ctr = Loop_Ctr + 1
END
EXTRACT %v_list% Clean_%v_name_field% AS "%v_name_field%" TO Cleaned_%v_name_field%
END
OPEN Common_Words
DUPLICATES ON Word PRESORT OPEN TO "Common_Words_Duplicates.FIL" OPEN
CLASSIFY ON Word TO "Common_Words_Classify.FIL" OPEN
PROFILE ALL
STRATIFY ON COUNT MINIMUM 5 MAXIMUM 10 INTERVALS 10 TO SCREEN
OPEN Cleaned_%v_name_field%
SET LOOP TO 20
SET LOG