COM**************************************************************************** COM COM This code was written in the March of 2006 by CAATS (AKA David Coderre) and originally posted to the ACL USER FORUM. COM It was later uploaded onto the TexasACL.COM. COM COM Fuzzy Logic used to search for duplicates. The code is designed to be usable on any table. COM**************************************************************************** set safety off DIALOG (DIALOG TITLE "User Dialog" WIDTH 481 HEIGHT 372 ) (BUTTONSET TITLE "&OK;&Cancel" AT 370 12 DEFAULT 1 ) (TEXT TITLE "Fuzzy logic test for duplicates" AT 48 40 WIDTH 164 HEIGHT 32 ) (TEXT TITLE "Opens file, Removes special character for key field, Sorts file, Performs letter by letter comparision (both directions) of consecutive records." AT 84 88 WIDTH 312 HEIGHT 46 ) (TEXT TITLE "E.G. Vendors 'ABC' and 'ABC Ltd' differ by 3 letters 'Ltd', and Vendors 'Marc Motors' and 'Mark Motors' differ by one letter 'c' vs. 'k'" AT 72 232 WIDTH 343 HEIGHT 36 ) (TEXT TITLE "Identifies records if the number of different letters in the same position is <= threshold value." AT 84 160 WIDTH 312 HEIGHT 29 ) (TEXT TITLE "cc CAATS" AT 324 328 ) DIALOG (DIALOG TITLE "User Dialog" WIDTH 484 HEIGHT 347 ) (BUTTONSET TITLE "&OK;&Cancel" AT 360 24 DEFAULT 1 ) (TEXT TITLE "Fuzy logic test for duplicates" AT 24 40 WIDTH 147 HEIGHT 24 ) (TEXT TITLE "Select table to open" AT 24 76 ) (ITEM TITLE "f" TO "v_infile" AT 120 120 WIDTH 205 HEIGHT 130 ) (TEXT TITLE "cc CAATS" AT 360 304 ) open %v_infile% DELETE Dup_test1 OK DELETE Dup_test2 OK DELETE RDup_test1 OK DELETE RDup_test2 OK DELETE recnum OK DELETE %v_dup_Field%v2 OK DELETE %v_list%v2 OK DELETE Fuzzy_Dup.fil OK DELETE format Fuzzy_Dup OK DIALOG (DIALOG TITLE "User Dialog" WIDTH 492 HEIGHT 395 ) (BUTTONSET TITLE "&OK;&Cancel" AT 370 12 DEFAULT 1 ) (TEXT TITLE "Select field to test for duplicates" AT 24 184 WIDTH 163 ) (ITEM TITLE "C" TO "v_dup_field" AT 24 228 WIDTH 202 HEIGHT 130 ) (TEXT TITLE "Set threshold (considered duplicate if number of characters difference is less than or equal to)" AT 24 52 WIDTH 184 HEIGHT 51 ) (EDIT TO "v_threshold" AT 240 72 WIDTH 20 HEIGHT 24 DEFAULT "4" ) (TEXT TITLE "Select field - to list if duplicate record" AT 252 184 WIDTH 180 HEIGHT 31 ) (ITEM TITLE "CND" TO "v_list" AT 240 228 WIDTH 223 HEIGHT 133 ) DEFINE field recnum Computed recno() DEFINE field %v_dup_Field%v2 COMPUTED recoffset(%v_Dup_field%,1) DEFINE field %v_list%v2 COMPUTED recoffset(%v_list%,1) DEFINE field Dup_test1 Computed INCLUDE(UPPER(%v_dup_field%),'QWERTYUIOPLKJHGFDSAZXCVBNM1234567890') DEFINE field Dup_test2 COMPUTED recoffset(Dup_test1,1) DEFINE field RDup_test1 COMPUTED Alltrim(REVERSE(Dup_test1)) DEFINE field RDup_test2 COMPUTED alltrim(REVERSE(Dup_test2)) SORT on Dup_test1 to temp OPEN loop_max=LEN(%v_dup_field%) loop_ctr=1 Ldup_key=0 Rdup_key=0 GROUP LOOP while loop_ctr<=loop_max LDup_key=Ldup_key+1 if substr(dup_test1,loop_ctr,1)<>substr(dup_test2,loop_ctr,1) loop_ctr=loop_ctr+1 END extract recnum %v_dup_Field% %v_list% %v_dup_Field%v2 %v_list%v2 LDup_key RDup_key 1 as 'L_cnt' 0 as 'R_cnt' to fuzzy_temp append if 1=1 Ldup_key=0 Rdup_key=0 loop_ctr=1 END OPEN %v_infile% SORT on RDup_test1 to temp OPEN loop_ctr=1 Ldup_key=0 Rdup_key=0 GROUP LOOP while loop_ctr<=loop_max RDup_key=Rdup_key+1 if substr(Rdup_test1,loop_ctr,1)<>substr(Rdup_test2,loop_ctr,1) loop_ctr=loop_ctr+1 END extract recnum %v_dup_Field% %v_list% %v_dup_Field%v2 %v_list%v2 Ldup_key RDup_key 0 as 'L_cnt' 1 as 'R_cnt' to fuzzy_temp append if 1=1 LDup_key=0 Rdup_key=0 loop_ctr=1 END OPEN Fuzzy_temp Summarize on %v_dup_Field% %v_dup_Field%v2 OTHER %v_list% %v_list%v2 recnum Acc Ldup_key RDup_key L_cnt R_cnt to Fuzzy_temp2 PRESORT OPEN DEFINE field Dup_Key Computed MIN(LDup_Key,RDup_key) if l_cnt>0 and r_cnt>0 Ldup_key if L_cnt>0 RDup_key EXTRACT %v_dup_Field% %v_list% %v_dup_Field%v2 %v_list%v2 recnum Dup_Key if Dup_Key<=%v_threshold% to Fuzzy_Dup OPEN %v_infile% DELETE Fuzzy_TEMP.fil OK DELETE format Fuzzy_TEMP OK DELETE Fuzzy_TEMP2.fil OK DELETE format Fuzzy_TEMP2 OK DELETE TEMP.fil OK DELETE format TEMP OK DELETE Dup_test1 OK DELETE Dup_test2 OK DELETE RDup_test1 OK DELETE RDup_test2 OK DELETE recnum OK DELETE %v_dup_Field%v2 OK DELETE %v_list%v2 OK DELETE loop_Ctr OK DELETE loop_max OK set safety on