#IF _TIMING_ ACTIVATE SCREEN #ENDIF APPEND FROM _FOLDER_ + "Adressen.txt" DELIMITED #IF _TIMING_ ? SECONDS() - m.lnSec0, "Append from" #ENDIF #define CL_USEHASH .t. #if CL_USEHASH * SET LIBRARY TO md5.fll SET LIBRARY TO vfpencryption71.fll REPLACE ALL ; ; && Vorname WITH LTRIM( Vorname ), ; && !needed Name WITH LTRIM( Name ), ; Strasse WITH LTRIM( Strasse ), ; HausNr WITH LTRIM( HausNr ), ; PLZ WITH LTRIM( PLZ ), ; Ort WITH LTRIM( Ort ), ; eMail WITH LTRIM( eMail ) ; , ChkSum with Hash(Vorname + name + Strasse + HausNr + PLZ + Ort + email, 5) * Ed Laefe Version *ChkSum with md5Hash(Vorname + name + Strasse + HausNr ; + PLZ + Ort + email) *-- Checksum too slow * ChkSum with INT(VAL(SYS(2017,"ChkSum",0,1))-m.lnDiff) #else REPLACE ALL ; ; && Vorname WITH LTRIM( Vorname ), ; && !needed Name WITH LTRIM( Name ), ; Strasse WITH LTRIM( Strasse ), ; HausNr WITH LTRIM( HausNr ), ; PLZ WITH LTRIM( PLZ ), ; Ort WITH LTRIM( Ort ), ; eMail WITH LTRIM( eMail ) #endif #IF _TIMING_ ? SECONDS() - m.lnSec0, "Repl all" #ENDIF #if CL_USEHASH SET UNIQUE off INDEX on ChkSum TO _unique COMPACT local lcLastHash, laRecs[1], llWasDup lcLastHash = "1" && since length is different, it is initialized ok ? SECONDS() - m.lnSec0, "Idx ChkSum" #elif .f. *-- not totally clear, seems to be faster than unique in index SET UNIQUE on INDEX ON HausNr + PLZ + Ort + Vorname + Name + Strasse + eMail TO _unique COMPACT #IF _TIMING_ ? SECONDS() - m.lnSec0, "Rearranged Idx Built" #ENDIF #else INDEX ON Vorname + Name + Strasse + HausNr + PLZ + Ort + eMail TAG _unique UNIQUE #IF _TIMING_ ? SECONDS() - m.lnSec0, "CDX Built" #ENDIF #endif create table _FOLDER_ + "Table2" ; (name C(61), Strasse C(34), Ort C(36), eMail C(30) ) flock("Table2") select Table1 scan #if CL_USEHASH *-- first try using a hash index to filter out duplicate records *-- assumption: only a minuscle part is really duplicate *-- hash function is nearly collision free (2**32 Bins for 5*10**5 should work *-- even for mediocre hash function key distribution *-- therefore not always updating the "laststore" and only checking a boolean *-- but each first duplicate hashentry gets added with a skip-1/initialize/skip if !m.lcLastHash == ChkSum insert into Table2 values ; ( rtrim( Table1.Vorname ) +" "+ Table1.name; , rtrim( Table1.Strasse ) +" "+ Table1.HausNr ; , Table1.PLZ + Table1.Ort, Table1.eMail) lcLastHash = ChkSum IF m.llWasDup llWasDup = .f. dimension laRecs[1] endif ELSE IF not m.llWasDup llWasDup = .t. SKIP -1 laRecs[1] = Vorname + name + Strasse + HausNr + PLZ + Ort + eMail SKIP IF !laRecs[1] == Vorname + name + Strasse + HausNr + PLZ + Ort + eMail insert into Table2 values ; ( rtrim( Table1.Vorname ) +" "+ Table1.name; , rtrim( Table1.Strasse ) +" "+ Table1.HausNr ; , Table1.PLZ + Table1.Ort, Table1.eMail) dimension laRecs[2] laRecs[2] = Vorname + name + Strasse + HausNr + PLZ + Ort + eMail * else * Show first false Hash collision endif ELSE if ascan(laRecs, Vorname + name + Strasse + HausNr + PLZ + Ort + eMail)=0 insert into Table2 values ; ( rtrim( Table1.Vorname ) +" "+ Table1.name; , rtrim( Table1.Strasse ) +" "+ Table1.HausNr ; , Table1.PLZ + Table1.Ort, Table1.eMail) dimension laRecs[ALEN(laRecs)+1] laRecs[ALEN(laRecs)] = Vorname + name + Strasse + HausNr + PLZ + Ort + eMail * else * Show multiple false Hash collision endif endif endif #else insert into Table2 values ; ( rtrim( Table1.Vorname ) +" "+ Table1.name; , rtrim( Table1.Strasse ) +" "+ Table1.HausNr ; , Table1.PLZ + Table1.Ort, Table1.eMail) #endif endscan #if _TIMING_ ? seconds() - m.lnSec0, "Scan/Insert", RECCOUNT("Table2") #endif * DELETE TAG _Unique use in Table1 #if _TIMING_ ? seconds() - m.lnSec0, "TableClose" #endif ERASE _FOLDER_ + "Adressen2.*" lnFHandle = FCREATE( _FOLDER_ + "Adressen2.txt", 0 ) *ASSERT NOT m.lnFHandle == -1 SELECT Table2 SCAN #if .t. *-- no variables are fastest here - CPU faster than RAM even in P-Code! FPUTS( m.lnFHandle, ; STUFF( RTRIM( Name ), AT( " ", Name ), 1, ", " ) +", "+ ; STUFF( RTRIM( Strasse ), RAT( " ", RTRIM( Strasse ) ), 1, ", " ) +", "+ ; STUFF( RTRIM( Ort ), AT( " ", Ort ), 1, ", " ) +", "+ ; RTRIM( eMail ) ) #elif .t. lcStrasse = RTRIM( Strasse ) FPUTS( m.lnFHandle, ; STUFF( RTRIM( Name ), AT( " ", Name ), 1, ", " ) +", "+ ; STUFF( m.lcStrasse, RAT( " ", m.lcStrasse ), 1, ", " ) +", "+ ; STUFF( RTRIM( Ort ), AT( " ", Ort ), 1, ", " ) +", "+ ; RTRIM( eMail ) ) #else lcName = RTRIM( Name ) lcStrasse = RTRIM( Strasse ) FPUTS( m.lnFHandle, ; STUFF( m.lcName, RAT( " ", m.lcName ), 1, ", " ) +", "+ ; STUFF( m.lcStrasse, RAT( " ", m.lcStrasse ), 1, ", " ) +", "+ ; STUFF( RTRIM( Ort ), AT( " ", Ort ), 1, ", " ) +", "+ ; RTRIM( eMail ) ) #endif ENDSCAN FCLOSE( m.lnFHandle )and don't forget to add a