Level Extreme platform
Subscription
Corporate profile
Products & Services
Support
Legal
Français
Crc32
Message
From
25/08/2011 05:28:20
 
 
To
25/08/2011 04:55:11
General information
Forum:
ASP.NET
Category:
Other
Title:
Re: Crc32
Environment versions
Environment:
C# 4.0
Miscellaneous
Thread ID:
01521077
Message ID:
01521740
Views:
37
Boy you've really dived into this one :-} Want to try it when I get the time.
At first I assumed you had hard-wired the buffer count to your machine specs but I see you're doing it dynamically based on the environment....


>Here's a modified version which uses parallel calculation of the CRC
>
>Tests done on a 544 Mb file. Once the data is warm (been read and in memory) it takes 0.17 sec to read the file
>
>(1) Supports IEEE (same as vfp) and Castagnoli crc. Enum used to create the object, default is IEEE
>
>(2) Use sliced by 16 instead of slicing by 8 (takes off 6-7 % of the time)
>
>(3) Table construction
> - tables are constructed when they are first used
> - are static
> - construction is partially parallel (17 ms instead of 50 ms)
>
>(4) calculation of the crc
>
>There's a loop that reads a huge buffer at a time
>
>The buffer is logically divided into a number of blocks. The best performance I got was to take the number of logical cores multiplied by 8
>
>Then, the CRC of each of the logical blocks is calculated in parallel
>
>To calculate the CRC of the buffer, the CRCs of each of the logical blocks are merged ( took me a while to figure that one out)
>
>(5) Timing ( I have a quad core, logical cores = 8)
>
>Reading all the warm data takes 0.17 sec
>Calculation of the tables in parallel is about 0.17 sec which I do not consider since I run the test twice and use only the second result
>
>a) sequential calculation of crc
>1.73 sec, minus reading of the data 0.17 sec leaves roughly 1.56 sec
>
>b) parallel calculation of the crc
>0.81 sec minus reading of the data 0.17 sec leaves roughly 0.64 sec
>
>sequential/parallel is 2.4
>
>I thought I would have a factor higher than 3, but maybe it's due to the fact that there are two points where the processing is serial, ie reading of the big buffer and merging the crc of the blocks
>
>
>For anyone interested - here's the code
>
>(1) Example of using the class
>(2) Enum of the polynomials
>(3) some constants
>(4) CRC32 - static part
>(5) CRC32 - rest
>
>_
>
>(1) Example of using the class
>
>			string filename;
>			filename = @"M:\VM\Machines\Vfp9_Sp1\Vfp9_Sp1_D.vmdk";
>
>			using (Stream s = File.OpenRead(filename))
>			{
>				var vv = new CRC32();
>				crc_sliced = 0;
>				crc_sliced = vv.GetCRC(s);
>				Console.WriteLine("{0} sliced", crc_sliced.ToString("X8"));
>			}
>
>
>(2) Enum of the polynomials
>
>	// must start from zero, no holes permitted
>	public enum CRCPolynomial
>	{
>		IEEE = 0,
>		Castagnoli = 1
>	}
>
>
>(3) some constants
>
>	public class BitSize
>	{
>		public const int BitsPerByte = 8;
>		public const int BitsPerByteShift = 3;
>
>		public const int Byte = (ByteSize.Byte << BitsPerByteShift);
>		public const int SByte = (ByteSize.SByte << BitsPerByteShift);
>
>		public const int Char = (ByteSize.Char << BitsPerByteShift);
>
>		public const int Int16 = (ByteSize.Int16 << BitsPerByteShift);
>		public const int UInt16 = (ByteSize.UInt16 << BitsPerByteShift);
>
>		public const int Int32 = (ByteSize.Int32 << BitsPerByteShift);
>		public const int UInt32 = (ByteSize.UInt32 << BitsPerByteShift);
>
>		public const int Int64 = (ByteSize.Int64 << BitsPerByteShift);
>		public const int UInt64 = (ByteSize.UInt64 << BitsPerByteShift);
>	}
>
>
>(4) CRC32 - static part
>
>#define USE_PARALLEL_TABLE_CONSTRUCTION
>	// Static part
>	public partial class CRC32
>	{
>		#region Polynomial Definition
>		#region Data
>		//_____________________________________________________________________
>		private static readonly UInt32[][] PolynomialsDefinition =
>			// do not change the sequence without changing  enum CRCPolynomial
>			{	// IEEE
>				new UInt32[] { 26, 23, 22, 16, 12, 11, 10, 8, 7, 5, 4, 2, 1, 0 },
>				
>				//Castagnoli
>				new UInt32[] { 28, 27, 26, 25, 23, 22, 20, 19, 18, 14, 13, 11, 10, 9, 8, 6, 0 }
>			};
>
>		// As many elements as enum CRCPolynomial
>		private static readonly UInt32[] ReversePolynomial = InitReversePolynomial();
>		//_____________________________________________________________________
>		#endregion Data
>		#region Methods
>		private static UInt32[] InitReversePolynomial()
>		{
>			UInt32[] reversePolynomial = new UInt32[PolynomialsDefinition.Length];
>
>			for (int i = 0; i <= PolynomialsDefinition.GetUpperBound(0); i++)
>			{
>
>				reversePolynomial[i] = GetReversePolynomial(PolynomialsDefinition[i]);
>			}
>			return reversePolynomial;
>		}
>		//______________________________________________________________________
>		private static UInt32 GetReversePolynomial(UInt32[] coef)
>		{
>			UInt32 polynomial = 0;
>			for (int i = 0; i <= coef.GetUpperBound(0); i++)
>				polynomial |= 1u << (int)(31u - coef[i]);
>
>			//Console.WriteLine("poly {0}", polynomial.ToString("X8"));
>			return polynomial;
>		}
>		//______________________________________________________________________
>		#endregion Methods
>		#endregion Polynomial Definition
>
>		
>		/*
>		 * Tables are grouped by 4, since this is a crc of 32 bits
>		 * Each set of 4 tables can 'multiply' or transform a crc a mumber of bytes to the right
>		 * 
>		 * There are two sets of tables, but basically they do the same
>		 *	- (1) used for hashing 
>		 *		see Slicing by 8 http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
>		 *		
>		 *	- (2) used to transform or multiply the crc of a block a number of bytes to the right
>		 */
>
>		/* Each polynomial has its own set of tables
>		 * Implemented here are IEEE and Castagnoli polynomials
>		 * The tables are static and are only calculated on first use
>		 */
>
>		
>		// Used to lock the access to the tables, they will be constructed the first time they are accessed
>		private static readonly object TableLocker = new object();
>
>		#region HashTables
>		// HashTables HashTables[CRCPolynomial.Index][tableId][elements 0-255]
>		private static readonly UInt32[][][] HashTables32 = new UInt32[PolynomialsDefinition.Length][][];
>
>		// Slicing by 16 algorithm - need 16 tables, each one = 1k, so 16k in total
>		private const uint HashTable32_Id_04 = 0;
>		private const uint HashTable32_Id_05 = 1;
>		private const uint HashTable32_Id_06 = 2;
>		private const uint HashTable32_Id_07 = 3;
>		private const uint HashTable32_Id_08 = 4;
>		private const uint HashTable32_Id_09 = 5;
>		private const uint HashTable32_Id_10 = 6;
>		private const uint HashTable32_Id_11 = 7;
>		private const uint HashTable32_Id_12 = 8;
>		private const uint HashTable32_Id_13 = 9;
>		private const uint HashTable32_Id_14 = 10;
>		private const uint HashTable32_Id_15 = 11;
>		private const uint HashTable32_Id_16 = 12;
>		private const uint HashTable32_Id_17 = 13;
>		private const uint HashTable32_Id_18 = 14;
>		private const uint HashTable32_Id_19 = 15;
>
>		private const uint HashTable32_Id_Min = HashTable32_Id_04;
>		private const uint HashTable32_Id_Max = HashTable32_Id_19;
>
>		//______________________________________________________________________
>		private static void GetTables32(
>			CRCPolynomial polyId,
>			out UInt32[][] hashTables32,
>			out UInt32[][] transformTables32
>			)
>		{
>			UInt32 polyIndex = (UInt32) polyId;
>			UInt32 reversePolynomial = ReversePolynomial[polyIndex];
>
>			lock (TableLocker)
>			{
>				hashTables32 = GetHashTables32(polyIndex);
>				transformTables32 = GetTransformTables32(polyIndex);
>			}
>		}
>		//______________________________________________________________________
>		//______________________________________________________________________
>		private static UInt32[][] GetHashTables32(UInt32 polyIndex)
>		{
>			UInt32[][] hashTables32;
>
>			if (HashTables32[polyIndex] != null)
>			{
>				hashTables32 = HashTables32[polyIndex];
>			}
>			else
>			{
>				hashTables32 = new UInt32[HashTable32_Id_Max + 1][];
>				HashTables32[polyIndex] = hashTables32;
>
>				UInt32 reversePolynomial = ReversePolynomial[polyIndex];
>
>
>				// Calculate the first table
>				hashTables32[HashTable32_Id_04] = MakeCRCTable(reversePolynomial, 4);
>
>				//Calculate the rest by shifting the previous one one byte to the right
>				for (uint index = HashTable32_Id_04; ++index <= HashTable32_Id_Max; )
>					hashTables32[index] = ShiftCRCTable(hashTables32[index - 1], reversePolynomial, 1);
>			}
>
>			return hashTables32;
>		}
>		//______________________________________________________________________
>		#endregion HashTables
>
>		//______________________________________________________________________
>		#region Thread
>
>		//______________________________________________________________________
>		#region Data
>
>
>		private static readonly int NumberOfBuffers =  Environment.ProcessorCount << 3;
>		private const int BufferSizeEach = 16384;
>		private static readonly int TotalBufferSize = BufferSizeEach * NumberOfBuffers;
>
>
>		// TransformTables32[CRCPolynomial.Index][tableId  0-3 ][elements 0-255]
>		private static readonly UInt32[][][] TransformTables32 = new UInt32[PolynomialsDefinition.Length][][];
>		//______________________________________________________________________
>		#endregion Data
>		//______________________________________________________________________
>		#region Methods
>		//______________________________________________________________________
>		private static UInt32[][] GetTransformTables32(uint polyIndex)
>		{
>			UInt32[][] transformTables32;
>
>			if (TransformTables32[polyIndex] != null)
>			{
>				transformTables32 = TransformTables32[polyIndex];
>			}
>			else
>			{
>				transformTables32 = new UInt32[4][];
>				TransformTables32[polyIndex] = transformTables32;
>
>				UInt32 reversePolynomial = ReversePolynomial[polyIndex];
>
>
>				// Calculate the first table - takes 50 - 60 ms
>				transformTables32[0] = MakeCRCTable(reversePolynomial, BufferSizeEach);
>
>				//Calculate the rest by shifting the previous one one byte to the right
>				for (uint index = 0; ++index <= 3; )
>					transformTables32[index] = ShiftCRCTable(transformTables32[index - 1], reversePolynomial, 1);
>			}
>
>			return transformTables32;
>		}
>		//______________________________________________________________________
>		#endregion Methods
>		//______________________________________________________________________
>		#endregion Thread
>		//______________________________________________________________________
>		//______________________________________________________________________
>		#region Table Common methods
>		//______________________________________________________________________
>		private static UInt32[] MakeCRCTable(UInt32 reversePolynomial, uint byteOffset)
>		{
>			if (byteOffset < 4)
>				throw new ArgumentOutOfRangeException("byteOffset", " >= 4");
>
>			UInt32[] table = new UInt32[byte.MaxValue + 1];
>			
>
>			// loop is Parallel candidate
>
>#if !USE_PARALLEL_TABLE_CONSTRUCTION
>			UInt32 remainder;
>			for (uint i = byte.MinValue; i <= byte.MaxValue; i++)
>			{
>				remainder = i;
>				for( int offset = (int)byteOffset - 4 + 1; --offset >= 0; )
>					remainder = ProcessRemainderByte(reversePolynomial, remainder);
>
>				table[i] = remainder;
>
>			}
>#else
>			Parallel.For(byte.MinValue, byte.MaxValue + 1, i =>
>				{
>					UInt32 remainder = unchecked((UInt32)i);
>					for (int offset = (int)byteOffset - 4 + 1; --offset >= 0; )
>						remainder = ProcessRemainderByte(reversePolynomial, remainder);
>
>					table[i] = remainder;
>
>				}
>			);
>#endif
>			return table;
>		}
>		//______________________________________________________________________
>		private static UInt32 ProcessRemainderByte(UInt32 reversePolynomial, UInt32 remainder)
>		{
>			for (int bits = BitSize.Byte; --bits >= 0; )
>				remainder = (remainder & (0x01u)) != 0 ? reversePolynomial ^ (remainder >> 1) : (remainder >> 1);
>
>			return remainder;
>		}
>		//______________________________________________________________________
>		private static UInt32[] ShiftCRCTable(UInt32[] tableIn, UInt32 reversePolynomial, UInt32 nBytes)
>		{
>			if (nBytes < 1)
>				throw new ArgumentOutOfRangeException("nBytes", " >= 1");
>
>			UInt32[] tableOut = new UInt32[tableIn.Length];
>
>			
>
>			// loop is Parallel candidate
>#if !USE_PARALLEL_TABLE_CONSTRUCTION
>			UInt32 remainder;
>			for (int i = tableIn.Length; --i >= 0; )
>			{
>				remainder = tableIn[i];
>				for( int count = (int) nBytes; --count >= 0; )
>					remainder = ProcessRemainderByte(reversePolynomial, remainder);
>
>				tableOut[i] = remainder;
>			}
>#else
>			Parallel.For(0, tableIn.Length, i =>
>				{
>					UInt32 remainder = unchecked(tableIn[(UInt32)i]);
>					for (int count = (int)nBytes; --count >= 0; )
>						remainder = ProcessRemainderByte(reversePolynomial, remainder);
>
>					tableOut[i] = remainder;
>				}
>			);
>#endif
>			return tableOut;
>		}
>		//______________________________________________________________________
>		#endregion Table Common methods
>
>	}
>
>
>(5) CRC32 - rest
>
>
>#define USE_PARALLEL_CRC_CALCULATION
>
>	public partial class CRC32
>	{
>		//______________________________________________________________________
>		// tables for hashing
>		private UInt32[] Hash_04;
>		private UInt32[] Hash_05;
>		private UInt32[] Hash_06;
>		private UInt32[] Hash_07;
>		private UInt32[] Hash_08;
>		private UInt32[] Hash_09;
>		private UInt32[] Hash_10;
>		private UInt32[] Hash_11;
>		private UInt32[] Hash_12;
>		private UInt32[] Hash_13;
>		private UInt32[] Hash_14;
>		private UInt32[] Hash_15;
>		private UInt32[] Hash_16;
>		private UInt32[] Hash_17;
>		private UInt32[] Hash_18;
>		private UInt32[] Hash_19;
>
>		// tables for Transformation
>		private UInt32[] Transform_4096_00;
>		private UInt32[] Transform_4096_01;
>		private UInt32[] Transform_4096_02;
>		private UInt32[] Transform_4096_03;
>
>		private const UInt32 CRC32InitValue = 0xffffffff;
>		//______________________________________________________________________
>		#region Constructors
>		//______________________________________________________________________
>		public CRC32()
>			: this(CRCPolynomial.IEEE)
>		{
>		}
>		//______________________________________________________________________
>		public CRC32(CRCPolynomial poly)
>			: base()
>		{
>			// get the tables
>			UInt32[][] hash, transform;
>
>			GetTables32(poly, out hash, out transform);
>
>			Hash_04 = hash[HashTable32_Id_04];
>			Hash_05 = hash[HashTable32_Id_05];
>			Hash_06 = hash[HashTable32_Id_06];
>			Hash_07 = hash[HashTable32_Id_07];
>			Hash_08 = hash[HashTable32_Id_08];
>			Hash_09 = hash[HashTable32_Id_09];
>			Hash_10 = hash[HashTable32_Id_10];
>			Hash_11 = hash[HashTable32_Id_11];
>
>			Hash_12 = hash[HashTable32_Id_12];
>			Hash_13 = hash[HashTable32_Id_13];
>			Hash_14 = hash[HashTable32_Id_14];
>			Hash_15 = hash[HashTable32_Id_15];
>			Hash_16 = hash[HashTable32_Id_16];
>			Hash_17 = hash[HashTable32_Id_17];
>			Hash_18 = hash[HashTable32_Id_18];
>			Hash_19 = hash[HashTable32_Id_19];
>
>			Transform_4096_00 = transform[0];
>			Transform_4096_01 = transform[1];
>			Transform_4096_02 = transform[2];
>			Transform_4096_03 = transform[3];
>
>		}
>		//______________________________________________________________________
>		#endregion
>		#region GetCRC
>		public Int32 GetCRC(Stream stream)
>		{
>			UInt32 crcCurrent = CRC32InitValue;
>
>			byte[] buf = new byte[TotalBufferSize];
>			UInt32[] bufferCRC = new UInt32[NumberOfBuffers];
>
>			int count;
>			while( (count = stream.Read(buf, 0, TotalBufferSize)) > 0 )
>			{
>				if (count < TotalBufferSize)
>				{
>					crcCurrent = HashBlock16(crcCurrent, buf, 0, count);
>					break;
>				}
>
>				GetBufferCRC(bufferCRC, buf);
>				crcCurrent = MergeCRCs(crcCurrent, bufferCRC);
>			}
>
>
>			return unchecked((Int32)~crcCurrent); 
>		}
>		//______________________________________________________________________
>		// parallel candidate
>		private void GetBufferCRC(UInt32[] threadCRC, byte[] buf)
>		{
>#if !USE_PARALLEL_CRC_CALCULATION
>
>			int offset = 0;
>			for (int i = 0; i < NumberOfBuffers; i++, offset += BufferSizeEach)
>			{
>				threadCRC[i] = HashBlock16(0, buf, offset, BufferSizeEach);
>			}
>#else
>			Parallel.For(0, NumberOfBuffers, i =>
>			{
>				int offset = i * BufferSizeEach;
>				threadCRC[i] = HashBlock16(0, buf, offset, BufferSizeEach);			
>			}
>			);
>
>#endif
>			
>		}
>		//______________________________________________________________________
>		/*
>		 */
>		private UInt32 MergeCRCs(UInt32 crcCurrent, UInt32[] bufferCRC)
>		{
>			for (int i = 0; i < NumberOfBuffers; i++ )
>				crcCurrent = TransformCRC(crcCurrent) ^ bufferCRC[i];
>
>			return crcCurrent;
>		}
>		//______________________________________________________________________
>		#endregion
>		#region Hashing
>		//______________________________________________________________________
>		// Slicing by 16
>		private UInt32 HashBlock16(UInt32 crc, byte[] buf, int offset, int count)
>		{
>
>			// head
>			for (int i = count >> 4; --i >= 0; offset += 16)
>			{
>				crc ^= ((UInt32)buf[offset + 3] << 24)
>						| ((UInt32)buf[offset + 2] << 16)
>						| ((UInt32)buf[offset + 1] << 8)
>						| ((UInt32)buf[offset]);
>
>				crc = Hash_19[crc & 0xFF]
>						^ Hash_18[(crc >> 8) & 0xFF]
>						^ Hash_17[(crc >> 16) & 0xFF]
>						^ Hash_16[(crc >> 24) ]
>						^ Hash_15[buf[offset + 4]]
>						^ Hash_14[buf[offset + 5]]
>						^ Hash_13[buf[offset + 6]]
>						^ Hash_12[buf[offset + 7]]
>						^ Hash_11[buf[offset + 8]]
>						^ Hash_10[buf[offset + 9]]
>						^ Hash_09[buf[offset + 10]]
>						^ Hash_08[buf[offset + 11]]
>						^ Hash_07[buf[offset + 12]]
>						^ Hash_06[buf[offset + 13]]
>						^ Hash_05[buf[offset + 14]]
>						^ Hash_04[buf[offset + 15]];
>			}
>
>			// tail
>			int tailCycles;
>
>			if ((tailCycles = count & 0x0f) != 0)
>				for (int i = tailCycles; --i >= 0; )
>					crc = Hash_04[(crc ^ (UInt32)buf[offset++]) & 0xFF] ^ (crc >> 8);
>
>			return crc;
>
>		}
>		//______________________________________________________________________
>		#endregion
>		#region TransformCRC
>		UInt32 TransformCRC(UInt32 crc)
>		{
>			return Transform_4096_03[crc & 0xFF]
>					^ Transform_4096_02[(crc >> 8) & 0xFF]
>					^ Transform_4096_01[(crc >> 16) & 0xFF]
>					^ Transform_4096_00[(crc >> 24)];
>		}
>		//______________________________________________________________________
>		#endregion
>	}
>
Previous
Next
Reply
Map
View

Click here to load this message in the networking platform