Level Extreme platform
Subscription
Corporate profile
Products & Services
Support
Legal
Français
Crc32
Message
From
25/08/2011 04:55:11
 
 
To
16/08/2011 07:20:25
General information
Forum:
ASP.NET
Category:
Other
Title:
Re: Crc32
Environment versions
Environment:
C# 4.0
Miscellaneous
Thread ID:
01521077
Message ID:
01521739
Views:
93
Likes (1)
Here's a modified version which uses parallel calculation of the CRC

Tests done on a 544 Mb file. Once the data is warm (been read and in memory) it takes 0.17 sec to read the file

(1) Supports IEEE (same as vfp) and Castagnoli crc. Enum used to create the object, default is IEEE

(2) Use sliced by 16 instead of slicing by 8 (takes off 6-7 % of the time)

(3) Table construction
- tables are constructed when they are first used
- are static
- construction is partially parallel (17 ms instead of 50 ms)

(4) calculation of the crc

There's a loop that reads a huge buffer at a time

The buffer is logically divided into a number of blocks. The best performance I got was to take the number of logical cores multiplied by 8

Then, the CRC of each of the logical blocks is calculated in parallel

To calculate the CRC of the buffer, the CRCs of each of the logical blocks are merged ( took me a while to figure that one out)

(5) Timing ( I have a quad core, logical cores = 8)

Reading all the warm data takes 0.17 sec
Calculation of the tables in parallel is about 0.17 sec which I do not consider since I run the test twice and use only the second result

a) sequential calculation of crc
1.73 sec, minus reading of the data 0.17 sec leaves roughly 1.56 sec

b) parallel calculation of the crc
0.81 sec minus reading of the data 0.17 sec leaves roughly 0.64 sec

sequential/parallel is 2.4

I thought I would have a factor higher than 3, but maybe it's due to the fact that there are two points where the processing is serial, ie reading of the big buffer and merging the crc of the blocks


For anyone interested - here's the code

(1) Example of using the class
(2) Enum of the polynomials
(3) some constants
(4) CRC32 - static part
(5) CRC32 - rest

_

(1) Example of using the class
			string filename;
			filename = @"M:\VM\Machines\Vfp9_Sp1\Vfp9_Sp1_D.vmdk";

			using (Stream s = File.OpenRead(filename))
			{
				var vv = new CRC32();
				crc_sliced = 0;
				crc_sliced = vv.GetCRC(s);
				Console.WriteLine("{0} sliced", crc_sliced.ToString("X8"));
			}
(2) Enum of the polynomials
	// must start from zero, no holes permitted
	public enum CRCPolynomial
	{
		IEEE = 0,
		Castagnoli = 1
	}
(3) some constants
	public class BitSize
	{
		public const int BitsPerByte = 8;
		public const int BitsPerByteShift = 3;

		public const int Byte = (ByteSize.Byte << BitsPerByteShift);
		public const int SByte = (ByteSize.SByte << BitsPerByteShift);

		public const int Char = (ByteSize.Char << BitsPerByteShift);

		public const int Int16 = (ByteSize.Int16 << BitsPerByteShift);
		public const int UInt16 = (ByteSize.UInt16 << BitsPerByteShift);

		public const int Int32 = (ByteSize.Int32 << BitsPerByteShift);
		public const int UInt32 = (ByteSize.UInt32 << BitsPerByteShift);

		public const int Int64 = (ByteSize.Int64 << BitsPerByteShift);
		public const int UInt64 = (ByteSize.UInt64 << BitsPerByteShift);
	}
(4) CRC32 - static part
#define USE_PARALLEL_TABLE_CONSTRUCTION
	// Static part
	public partial class CRC32
	{
		#region Polynomial Definition
		#region Data
		//_____________________________________________________________________
		private static readonly UInt32[][] PolynomialsDefinition =
			// do not change the sequence without changing  enum CRCPolynomial
			{	// IEEE
				new UInt32[] { 26, 23, 22, 16, 12, 11, 10, 8, 7, 5, 4, 2, 1, 0 },
				
				//Castagnoli
				new UInt32[] { 28, 27, 26, 25, 23, 22, 20, 19, 18, 14, 13, 11, 10, 9, 8, 6, 0 }
			};

		// As many elements as enum CRCPolynomial
		private static readonly UInt32[] ReversePolynomial = InitReversePolynomial();
		//_____________________________________________________________________
		#endregion Data
		#region Methods
		private static UInt32[] InitReversePolynomial()
		{
			UInt32[] reversePolynomial = new UInt32[PolynomialsDefinition.Length];

			for (int i = 0; i <= PolynomialsDefinition.GetUpperBound(0); i++)
			{

				reversePolynomial[i] = GetReversePolynomial(PolynomialsDefinition[i]);
			}
			return reversePolynomial;
		}
		//______________________________________________________________________
		private static UInt32 GetReversePolynomial(UInt32[] coef)
		{
			UInt32 polynomial = 0;
			for (int i = 0; i <= coef.GetUpperBound(0); i++)
				polynomial |= 1u << (int)(31u - coef[i]);

			//Console.WriteLine("poly {0}", polynomial.ToString("X8"));
			return polynomial;
		}
		//______________________________________________________________________
		#endregion Methods
		#endregion Polynomial Definition

		
		/*
		 * Tables are grouped by 4, since this is a crc of 32 bits
		 * Each set of 4 tables can 'multiply' or transform a crc a mumber of bytes to the right
		 * 
		 * There are two sets of tables, but basically they do the same
		 *	- (1) used for hashing 
		 *		see Slicing by 8 http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
		 *		
		 *	- (2) used to transform or multiply the crc of a block a number of bytes to the right
		 */

		/* Each polynomial has its own set of tables
		 * Implemented here are IEEE and Castagnoli polynomials
		 * The tables are static and are only calculated on first use
		 */

		
		// Used to lock the access to the tables, they will be constructed the first time they are accessed
		private static readonly object TableLocker = new object();

		#region HashTables
		// HashTables HashTables[CRCPolynomial.Index][tableId][elements 0-255]
		private static readonly UInt32[][][] HashTables32 = new UInt32[PolynomialsDefinition.Length][][];

		// Slicing by 16 algorithm - need 16 tables, each one = 1k, so 16k in total
		private const uint HashTable32_Id_04 = 0;
		private const uint HashTable32_Id_05 = 1;
		private const uint HashTable32_Id_06 = 2;
		private const uint HashTable32_Id_07 = 3;
		private const uint HashTable32_Id_08 = 4;
		private const uint HashTable32_Id_09 = 5;
		private const uint HashTable32_Id_10 = 6;
		private const uint HashTable32_Id_11 = 7;
		private const uint HashTable32_Id_12 = 8;
		private const uint HashTable32_Id_13 = 9;
		private const uint HashTable32_Id_14 = 10;
		private const uint HashTable32_Id_15 = 11;
		private const uint HashTable32_Id_16 = 12;
		private const uint HashTable32_Id_17 = 13;
		private const uint HashTable32_Id_18 = 14;
		private const uint HashTable32_Id_19 = 15;

		private const uint HashTable32_Id_Min = HashTable32_Id_04;
		private const uint HashTable32_Id_Max = HashTable32_Id_19;

		//______________________________________________________________________
		private static void GetTables32(
			CRCPolynomial polyId,
			out UInt32[][] hashTables32,
			out UInt32[][] transformTables32
			)
		{
			UInt32 polyIndex = (UInt32) polyId;
			UInt32 reversePolynomial = ReversePolynomial[polyIndex];

			lock (TableLocker)
			{
				hashTables32 = GetHashTables32(polyIndex);
				transformTables32 = GetTransformTables32(polyIndex);
			}
		}
		//______________________________________________________________________
		//______________________________________________________________________
		private static UInt32[][] GetHashTables32(UInt32 polyIndex)
		{
			UInt32[][] hashTables32;

			if (HashTables32[polyIndex] != null)
			{
				hashTables32 = HashTables32[polyIndex];
			}
			else
			{
				hashTables32 = new UInt32[HashTable32_Id_Max + 1][];
				HashTables32[polyIndex] = hashTables32;

				UInt32 reversePolynomial = ReversePolynomial[polyIndex];


				// Calculate the first table
				hashTables32[HashTable32_Id_04] = MakeCRCTable(reversePolynomial, 4);

				//Calculate the rest by shifting the previous one one byte to the right
				for (uint index = HashTable32_Id_04; ++index <= HashTable32_Id_Max; )
					hashTables32[index] = ShiftCRCTable(hashTables32[index - 1], reversePolynomial, 1);
			}

			return hashTables32;
		}
		//______________________________________________________________________
		#endregion HashTables

		//______________________________________________________________________
		#region Thread

		//______________________________________________________________________
		#region Data


		private static readonly int NumberOfBuffers =  Environment.ProcessorCount << 3;
		private const int BufferSizeEach = 16384;
		private static readonly int TotalBufferSize = BufferSizeEach * NumberOfBuffers;


		// TransformTables32[CRCPolynomial.Index][tableId  0-3 ][elements 0-255]
		private static readonly UInt32[][][] TransformTables32 = new UInt32[PolynomialsDefinition.Length][][];
		//______________________________________________________________________
		#endregion Data
		//______________________________________________________________________
		#region Methods
		//______________________________________________________________________
		private static UInt32[][] GetTransformTables32(uint polyIndex)
		{
			UInt32[][] transformTables32;

			if (TransformTables32[polyIndex] != null)
			{
				transformTables32 = TransformTables32[polyIndex];
			}
			else
			{
				transformTables32 = new UInt32[4][];
				TransformTables32[polyIndex] = transformTables32;

				UInt32 reversePolynomial = ReversePolynomial[polyIndex];


				// Calculate the first table - takes 50 - 60 ms
				transformTables32[0] = MakeCRCTable(reversePolynomial, BufferSizeEach);

				//Calculate the rest by shifting the previous one one byte to the right
				for (uint index = 0; ++index <= 3; )
					transformTables32[index] = ShiftCRCTable(transformTables32[index - 1], reversePolynomial, 1);
			}

			return transformTables32;
		}
		//______________________________________________________________________
		#endregion Methods
		//______________________________________________________________________
		#endregion Thread
		//______________________________________________________________________
		//______________________________________________________________________
		#region Table Common methods
		//______________________________________________________________________
		private static UInt32[] MakeCRCTable(UInt32 reversePolynomial, uint byteOffset)
		{
			if (byteOffset < 4)
				throw new ArgumentOutOfRangeException("byteOffset", " >= 4");

			UInt32[] table = new UInt32[byte.MaxValue + 1];
			

			// loop is Parallel candidate

#if !USE_PARALLEL_TABLE_CONSTRUCTION
			UInt32 remainder;
			for (uint i = byte.MinValue; i <= byte.MaxValue; i++)
			{
				remainder = i;
				for( int offset = (int)byteOffset - 4 + 1; --offset >= 0; )
					remainder = ProcessRemainderByte(reversePolynomial, remainder);

				table[i] = remainder;

			}
#else
			Parallel.For(byte.MinValue, byte.MaxValue + 1, i =>
				{
					UInt32 remainder = unchecked((UInt32)i);
					for (int offset = (int)byteOffset - 4 + 1; --offset >= 0; )
						remainder = ProcessRemainderByte(reversePolynomial, remainder);

					table[i] = remainder;

				}
			);
#endif
			return table;
		}
		//______________________________________________________________________
		private static UInt32 ProcessRemainderByte(UInt32 reversePolynomial, UInt32 remainder)
		{
			for (int bits = BitSize.Byte; --bits >= 0; )
				remainder = (remainder & (0x01u)) != 0 ? reversePolynomial ^ (remainder >> 1) : (remainder >> 1);

			return remainder;
		}
		//______________________________________________________________________
		private static UInt32[] ShiftCRCTable(UInt32[] tableIn, UInt32 reversePolynomial, UInt32 nBytes)
		{
			if (nBytes < 1)
				throw new ArgumentOutOfRangeException("nBytes", " >= 1");

			UInt32[] tableOut = new UInt32[tableIn.Length];

			

			// loop is Parallel candidate
#if !USE_PARALLEL_TABLE_CONSTRUCTION
			UInt32 remainder;
			for (int i = tableIn.Length; --i >= 0; )
			{
				remainder = tableIn[i];
				for( int count = (int) nBytes; --count >= 0; )
					remainder = ProcessRemainderByte(reversePolynomial, remainder);

				tableOut[i] = remainder;
			}
#else
			Parallel.For(0, tableIn.Length, i =>
				{
					UInt32 remainder = unchecked(tableIn[(UInt32)i]);
					for (int count = (int)nBytes; --count >= 0; )
						remainder = ProcessRemainderByte(reversePolynomial, remainder);

					tableOut[i] = remainder;
				}
			);
#endif
			return tableOut;
		}
		//______________________________________________________________________
		#endregion Table Common methods

	}
(5) CRC32 - rest
#define USE_PARALLEL_CRC_CALCULATION

	public partial class CRC32
	{
		//______________________________________________________________________
		// tables for hashing
		private UInt32[] Hash_04;
		private UInt32[] Hash_05;
		private UInt32[] Hash_06;
		private UInt32[] Hash_07;
		private UInt32[] Hash_08;
		private UInt32[] Hash_09;
		private UInt32[] Hash_10;
		private UInt32[] Hash_11;
		private UInt32[] Hash_12;
		private UInt32[] Hash_13;
		private UInt32[] Hash_14;
		private UInt32[] Hash_15;
		private UInt32[] Hash_16;
		private UInt32[] Hash_17;
		private UInt32[] Hash_18;
		private UInt32[] Hash_19;

		// tables for Transformation
		private UInt32[] Transform_4096_00;
		private UInt32[] Transform_4096_01;
		private UInt32[] Transform_4096_02;
		private UInt32[] Transform_4096_03;

		private const UInt32 CRC32InitValue = 0xffffffff;
		//______________________________________________________________________
		#region Constructors
		//______________________________________________________________________
		public CRC32()
			: this(CRCPolynomial.IEEE)
		{
		}
		//______________________________________________________________________
		public CRC32(CRCPolynomial poly)
			: base()
		{
			// get the tables
			UInt32[][] hash, transform;

			GetTables32(poly, out hash, out transform);

			Hash_04 = hash[HashTable32_Id_04];
			Hash_05 = hash[HashTable32_Id_05];
			Hash_06 = hash[HashTable32_Id_06];
			Hash_07 = hash[HashTable32_Id_07];
			Hash_08 = hash[HashTable32_Id_08];
			Hash_09 = hash[HashTable32_Id_09];
			Hash_10 = hash[HashTable32_Id_10];
			Hash_11 = hash[HashTable32_Id_11];

			Hash_12 = hash[HashTable32_Id_12];
			Hash_13 = hash[HashTable32_Id_13];
			Hash_14 = hash[HashTable32_Id_14];
			Hash_15 = hash[HashTable32_Id_15];
			Hash_16 = hash[HashTable32_Id_16];
			Hash_17 = hash[HashTable32_Id_17];
			Hash_18 = hash[HashTable32_Id_18];
			Hash_19 = hash[HashTable32_Id_19];

			Transform_4096_00 = transform[0];
			Transform_4096_01 = transform[1];
			Transform_4096_02 = transform[2];
			Transform_4096_03 = transform[3];

		}
		//______________________________________________________________________
		#endregion
		#region GetCRC
		public Int32 GetCRC(Stream stream)
		{
			UInt32 crcCurrent = CRC32InitValue;

			byte[] buf = new byte[TotalBufferSize];
			UInt32[] bufferCRC = new UInt32[NumberOfBuffers];

			int count;
			while( (count = stream.Read(buf, 0, TotalBufferSize)) > 0 )
			{
				if (count < TotalBufferSize)
				{
					crcCurrent = HashBlock16(crcCurrent, buf, 0, count);
					break;
				}

				GetBufferCRC(bufferCRC, buf);
				crcCurrent = MergeCRCs(crcCurrent, bufferCRC);
			}


			return unchecked((Int32)~crcCurrent); 
		}
		//______________________________________________________________________
		// parallel candidate
		private void GetBufferCRC(UInt32[] threadCRC, byte[] buf)
		{
#if !USE_PARALLEL_CRC_CALCULATION

			int offset = 0;
			for (int i = 0; i < NumberOfBuffers; i++, offset += BufferSizeEach)
			{
				threadCRC[i] = HashBlock16(0, buf, offset, BufferSizeEach);
			}
#else
			Parallel.For(0, NumberOfBuffers, i =>
			{
				int offset = i * BufferSizeEach;
				threadCRC[i] = HashBlock16(0, buf, offset, BufferSizeEach);			
			}
			);

#endif
			
		}
		//______________________________________________________________________
		/*
		 */
		private UInt32 MergeCRCs(UInt32 crcCurrent, UInt32[] bufferCRC)
		{
			for (int i = 0; i < NumberOfBuffers; i++ )
				crcCurrent = TransformCRC(crcCurrent) ^ bufferCRC[i];

			return crcCurrent;
		}
		//______________________________________________________________________
		#endregion
		#region Hashing
		//______________________________________________________________________
		// Slicing by 16
		private UInt32 HashBlock16(UInt32 crc, byte[] buf, int offset, int count)
		{

			// head
			for (int i = count >> 4; --i >= 0; offset += 16)
			{
				crc ^= ((UInt32)buf[offset + 3] << 24)
						| ((UInt32)buf[offset + 2] << 16)
						| ((UInt32)buf[offset + 1] << 8)
						| ((UInt32)buf[offset]);

				crc = Hash_19[crc & 0xFF]
						^ Hash_18[(crc >> 8) & 0xFF]
						^ Hash_17[(crc >> 16) & 0xFF]
						^ Hash_16[(crc >> 24) ]
						^ Hash_15[buf[offset + 4]]
						^ Hash_14[buf[offset + 5]]
						^ Hash_13[buf[offset + 6]]
						^ Hash_12[buf[offset + 7]]
						^ Hash_11[buf[offset + 8]]
						^ Hash_10[buf[offset + 9]]
						^ Hash_09[buf[offset + 10]]
						^ Hash_08[buf[offset + 11]]
						^ Hash_07[buf[offset + 12]]
						^ Hash_06[buf[offset + 13]]
						^ Hash_05[buf[offset + 14]]
						^ Hash_04[buf[offset + 15]];
			}

			// tail
			int tailCycles;

			if ((tailCycles = count & 0x0f) != 0)
				for (int i = tailCycles; --i >= 0; )
					crc = Hash_04[(crc ^ (UInt32)buf[offset++]) & 0xFF] ^ (crc >> 8);

			return crc;

		}
		//______________________________________________________________________
		#endregion
		#region TransformCRC
		UInt32 TransformCRC(UInt32 crc)
		{
			return Transform_4096_03[crc & 0xFF]
					^ Transform_4096_02[(crc >> 8) & 0xFF]
					^ Transform_4096_01[(crc >> 16) & 0xFF]
					^ Transform_4096_00[(crc >> 24)];
		}
		//______________________________________________________________________
		#endregion
	}
Gregory
Previous
Next
Reply
Map
View

Click here to load this message in the networking platform