* Summarize on the lowest GROUP BY level, and also include 3 dummy integer columns >* (one for each higher GROUP BY level), to be filled in later via UPDATE: > >SELECT ; > A.cFromIP, ; > A.cPortProt, ; > A.cToPDom, ; > A.cToFDom, ; > CAST( 0 AS I ) AS FromIPReq, ; > CAST( 0 AS I ) AS PortProtReq, ; > CAST( 0 AS I ) AS ToPDomReq, ; > CAST( COUNT(*) AS I ) AS ToFDomReq ; > FROM Logs1 A ; > INTO CURSOR GpTest ; > GROUP BY A.cFromIP, A.cPortProt, A.cToPDom, A.cToFDom ; > ORDER BY A.cFromIP, A.cPortProt, A.cToPDom, A.cToFDom ; > READWRITE > >* The Request Count columns are the ones that are CAST() to integer >* You can CAST() a COUNT(*) to integer (4 bytes) instead of the default N(10, 0) >* READWRITE is required as we'll be updating this cursor below. > >* Now do the UPDATEs: > >* FromIPReq column holds # of requests per distinct >* FromIP Address: >UPDATE GpTest ; > SET FromIPReq = ; > ( SELECT SUM( B.ToFDomReq ) FROM GpTest B ; > WHERE B.cFromIP = GpTest.cFromIP ) > >* PortProtReq column holds # of requests per distinct >* FromIP Address / PortProt combination: >UPDATE GpTest ; > SET PortProtReq = ; > ( SELECT SUM( B.ToFDomReq ) FROM GpTest B ; > WHERE B.cFromIP = GpTest.cFromIP ; > AND B.cPortProt = GpTest.cPortProt ) > >* ToPDomReq column holds # of requests per distinct >* FromIP Address / PortProt / ToPDom combination: >UPDATE GpTest ; > SET ToPDomReq = ; > ( SELECT SUM( B.ToFDomReq ) FROM GpTest B ; > WHERE B.cFromIP = GpTest.cFromIP ; > AND B.cPortProt = GpTest.cPortProt ; > AND B.cToPDom = GpTest.cToPDom )Since VFP9 supports subqueries in the SELECT clause, we can replace the "0" in each CAST() in the original SELECT - SQL with the corresponding UPDATE query (adjusting for the fact we're working from the original Logs1 cursor, and we're running COUNT(*) instead of a SUM() on the pre-summarized lowest-level GROUP BY values):
SELECT ; > A.cFromIP, ; > A.cPortProt, ; > A.cToPDom, ; > A.cToFDom, ; > CAST( ( SELECT COUNT( * ) FROM Logs1 B ; > WHERE B.cFromIP = A.cFromIP ) AS I ) AS FromIPReq, ; > CAST( ( SELECT COUNT( * ) FROM Logs1 B ; > WHERE B.cFromIP = A.cFromIP ; > AND B.cPortProt = A.cPortProt ) AS I ) AS PortProtReq, ; > CAST( ( SELECT COUNT( * ) FROM Logs1 B ; > WHERE B.cFromIP = A.cFromIP ; > AND B.cPortProt = A.cPortProt ; > AND B.cToPDom = A.cToPDom ) AS I ) AS ToPDomReq, ; > CAST( COUNT( * ) AS I ) AS ToFDomReq ; > FROM Logs1 A ; > INTO CURSOR GpTest ; > GROUP BY A.cFromIP, A.cPortProt, A.cToPDom, A.cToFDom ; > ORDER BY A.cFromIP, A.cPortProt, A.cToPDom, A.cToFDomAlthough this single SELECT approach works I'll probably use the multiple-update approach instead. In informal tests on a log file with about 54,000 rows, the single monolithic SELECT takes about 9.5 seconds to run, returning about 2,000 rows. The SELECT + UPDATEs approach takes ~3 seconds. This is with index tags on all 4 GROUP BY columns in the source Logs1 cursor. I think the reason for the time difference is that with the monolithic approach, each subquery in the SELECT clause has to run for 54,000 candidate rows. OTOH, with the SELECT + UPDATEs approach, the UPDATEs are running only on the ~2,000 resulting rows.
SET NULL ON && best with NULL, but SQL design bug .... CREATE CURSOR Logs1 ( cFromIP C(15),cPortProt I,cToPDom V(40),cToFDom V(40)) FOR k=1 TO 12 INSERT INTO Logs1 VALUES ("192.168.0.2",80,"microsoft.com","msdn.microsoft.com") NEXT FOR k=1 TO 27 INSERT INTO Logs1 VALUES ("192.168.0.2",80,"microsoft.com","office.microsoft.com") NEXT FOR k=1 TO 5 INSERT INTO Logs1 VALUES ("192.168.0.2",80,"yahoo.com","movies.yahoo.com") NEXT FOR k=1 TO 16 INSERT INTO Logs1 VALUES ("192.168.0.2",80,"yahoo.com","rd1.yahoo.com") NEXT FOR k=1 TO 122 INSERT INTO Logs1 VALUES ("192.168.0.2",110,"microsoft.com","msdn.microsoft.com") NEXT FOR k=1 TO 273 INSERT INTO Logs1 VALUES ("192.168.0.2",110,"microsoft.com","office.microsoft.com") NEXT FOR k=1 TO 55 INSERT INTO Logs1 VALUES ("192.168.0.2",110,"yahoo.com","movies.yahoo.com") NEXT FOR k=1 TO 126 INSERT INTO Logs1 VALUES ("192.168.0.2",110,"yahoo.com","rd1.yahoo.com") NEXT SELECT cFromIP ; , cPortProt ; , cToPDom ; , cToFDom ; , COUNT(*) counts ; INTO CURSOR GpTest1 ; FROM Logs1; GROUP BY 1,2,3,4 SELECT *; FROM GpTest1 ; UNION ALL; SELECT cFromIP ; , cPortProt ; , cToPDom ; , ""; , SUM(counts ); FROM GpTest1 ; GROUP BY 1,2,3 ; UNION ALL; SELECT cFromIP ; , cPortProt ; , ""; , ""; , SUM(counts ); FROM GpTest1 ; GROUP BY 1,2 ; UNION ALL; SELECT cFromIP ; , 0 ; , "" ; , ""; , SUM(counts ); FROM GpTest1 ; GROUP BY 1 ; INTO CURSOR GpTest ; ORDER BY 1,2,3,4 USE IN GpTest1 BROWSE