Wednesday, August 17, 2011

Merge java hashcode function to mysql5.1(UDF)


很多应用程序基于javahashcode方法,因此应用使用javahashcode算法,但是在数据库层需要做一些与应用层次相同的hash操作,例如数据迁移操作,但是目前mysql没有提供很好的hashcode的方法,因此将javahashcode算法整合到mysql5.1中去,作为mysqlhashcode函数使用,mysqlUDF函数代码如下:(在使用的过程中遇到问题,请给我留言): 

/*
* License: This code is in the public domain.
* java hash code function for mysql5.1.
*
* gcc -fPIC -Wall -I/home/oracle/mysql5.1.55/include/mysql -shared -o hashcode.so hashcode.cc
* cp hashcode.so  /home/oracle/mysql5.1.55/lib/mysql/plugin
* mysql -e "CREATE FUNCTION hashcode RETURNS INTEGER SONAME 'hashcode.so'"
*
* If you get the error "ERROR 1126 (HY000): Can't open shared library
* 'hashcode.so' (errno: 22  undefined symbol: 
* __gxx_personality_v0)" then you may need to use g++ instead of gcc.
*
* By:philip zhong
* email:philip.zhong1980@gmail.com
* blog:http://philipzhong.blogspot.com
* Share and Enjoy! :-)
*/


#include <my_global.h>
#include <my_sys.h>
#include <mysql.h>
#include <ctype.h>
#include <string.h>
#include <stdint.h>

/* On the first call, use this as the initial_value. */
#define HASH_INIT         0
/* Default for NULLs, just so the result is never NULL. */
#define HASH_NULL_DEFAULT 0

/* Prototypes */

extern "C" {
   uint32_t   hashcode_java(uchar* input, size_t len);
   my_bool    hashcode_init(UDF_INIT* initid, UDF_ARGS* args, char* message );
   uint32_t   hashcode(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error );
}

uint32_t  hashcode_java(uchar* input,size_t len)
{
    uint32_t hash = 0;
    size_t i;
    for (i = 0; i < len; i++) {
       hash = 31 * hash + input[i];
    }
    return hash;
}

my_bool
hashcode_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) {
   if (args->arg_count == 0 ) {
      strcpy(message,"hashcode requires at least one argument");
      return 1;
   }
   initid->maybe_null = 0;      /* The result will never be NULL */
   return 0;
}


uint32_t  hashcode( UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error ) {

   uint null_default = HASH_NULL_DEFAULT;
   uint32_t result  = HASH_INIT;
   uint i;

   for (i = 0 ; i < args->arg_count; ++i ) {
      if ( args->args[i] != NULL ) 
 {
          result= hashcode_java((uchar *)args->args[i], args->lengths[i]);
      }
      else {
          result= hashcode_java((uchar *)&null_default, sizeof(null_default));
      }
   }
   return result;
}

由于javahashcode出来的值是一个有符号的数字,因此插入数据库的时候,需要重写下javahashcode函数,java的源代码如下:

/**
* overload
* @param input
* @return
*/
public static long hashcode(String input) {

long MAX_VALUE = 0xffffffffL;
int hashvalue = 0;
int len = input.length();
for (int i = 0; i < len; i++) {
hashvalue = 31 * hashvalue + input.charAt(i);
}
if (hashvalue < 0)
return MAX_VALUE + hashvalue + 1;
else
return hashvalue;
}



Test cases:

String hashvalue = "zhaoxudong";
long hashcode = hashcode(hashvalue);
hashcode=3327663837

mysql> select hashcode("zhaoxudong");
+------------------------+
hashcode("zhaoxudong")
+------------------------+
3327663837
+------------------------+



String hashvalue = "A2ff7402d89c1086119bd7f17176ba66EEEEEEEEEEEEEEEEYYYYYYYY8888888888888888888888888888888888888888888888888888888";
long hashcode = hashcode(hashvalue);
hashcode=399008718

select hashcode("A2ff7402d89c1086119bd7f17176ba66EEEEEEEEEEEEEEEEYYYYYYYY8888888888888888888888888888888888888888888888888888888") as hash;
+-----------+
hash
+-----------+
399008718
+-----------+

No comments:

Post a Comment