C # Equivalent to this C hash function

I came across this C code which is a hashing algorithm for generating the same hash for similar strings:

unsigned long kaz_hash(const char *str)
{
    static unsigned long randbox[] = {
        0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
        0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
        0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
        0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
    };

    long acc = 0;

    while (*str) {
        acc ^= randbox[(*str + acc) & 0xf];
        acc = (acc << 1) | (acc >> 31);
        acc &= 0xffffffffU;
        acc ^= randbox[((*str++ >> 4) + acc) & 0xf];
        acc = (acc << 2) | (acc >> 30);
        acc &= 0xffffffffU;
    }
    return acc;
}

      

I am trying to use this in C #, this is what I came up with:

public static ulong kaz_hash(string str) {
  ulong[] randbox = {
    0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
    0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
    0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
    0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
  };

  long acc = 0;

  foreach (long c in str) {
    acc ^= (long)randbox[(c + acc) & 0xf];
    acc = (acc << 1) | (acc >> 31);
    acc &= 0xffffffffU;
    acc ^= (long)randbox[((c >> 4) + acc) & 0xf];
    acc = (acc << 2) | (acc >> 30);
    acc &= 0xffffffffU;
  }
  return (ulong)acc;
}

      

However, something is wrong. The 2 functions do not give the same results.

Edit

Thanks for all the comments and help. Finally, I was able to make this work. Below is a working version:

public static uint kaz_hash(string str) {
  uint[] randbox = {
    0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
    0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
    0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
    0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
  };

  int acc = 0;

  unchecked {
    foreach (int c in str) {
      acc ^= (int)randbox[(c + acc) & 0xf];
      acc = (acc << 1) | (acc >> 31);
      acc &= (int)0xffffffffU;
      acc ^= (int)randbox[((c >> 4) + acc) & 0xf];
      acc = (acc << 2) | (acc >> 30);
      acc &= (int)0xffffffffU;
    }
  }
  return (UInt32)acc;
}

      

+3


source to share


1 answer


I tried the code below with "abc", "abcd" and "abcde" and the C # code returns the same value as for C ++ code:



    public static int kaz_hash(string str)
        {
            UInt32[] randbox = {
            0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
            0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
            0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
            0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
            };

            int acc = 0;

            foreach (UInt32 c in str)
            {                    
                acc ^= (int)randbox[(c + acc) & 0xf];                    
                acc = (acc << 1) | (acc >> 31);
                acc &= -1;
                acc ^= (int)randbox[((c >> 4) + acc) & 0xf];
                acc = (acc << 2) | (acc >> 30);
                acc &= -1;
            }
            return acc;
        }

      

0


source







All Articles