Showing posts with label UDF. Show all posts
Showing posts with label UDF. Show all posts

Tuesday, August 30, 2011

Realize oracle to_char,to_number(UDF) on mysql5.1

很多时候,我们都想代码简单化,但是数据库的不同,这样的想法无法成行,最近在设计一个新项目的时候,想使得应用程序通用,既能够跑在oracle上,也能够跑在mysql上,因此将oracle的一些basic的函数在mysql上也实现了一把,本来想使用procedure实现,但这样的基础函数被应用频繁调用,使用procedure性能肯定是问题,因此改用mysql UDF,目前已经实现的函数有to_char,to_number.
源码如下:
to_char(source code)


/*
* License: This code is in the public domain.
* realize oracle to_char function for mysql5.1.
*
* gcc -fPIC -Wall -I/home/oracle/mysql5.1.55/include/mysql  -shared -o to_char.so to_char.cc
* cp to_char.so  /home/oracle/mysql5.1.55/lib/mysql/plugin
* mysql -e "CREATE FUNCTION to_char RETURNS STRING  SONAME 'to_char.so'"
*
* If you get the error "ERROR 1126 (HY000): Can't open shared library
* 'to_char.so' (errno: 22  undefined symbol:
* __gxx_personality_v0)" then you may need to use g++ instead of gcc.
*
* By:philip zhong
* email:philip.zhong1980@gmail.com
* blog:http://philipzhong.blogspot.com
* Share and Enjoy! :-)
*/
#include <my_global.h>
#include <my_sys.h>
#include <mysql.h>
#include <ctype.h>
#include <string.h>
#include <m_ctype.h>
#include <m_string.h>


void double2str(double in,char* out)  
{  
    double e=1e-8;  
    unsigned int i=0,j=0;  
    while(e<in)  
    {  
        e*=10;  
        i++;  
    }  
    while(e>1e-8 && j<8)  
    {   e/=10;  
        j++;i--;  
        if((char)(in/e+48)=='/')(*out++)=0;  
        else  
        (*out++)=(char)(in/e+48);  
        if(e==1.0)(*out++)='.';  
        in=in - ((int)(in/e))*e;  
    }  
    if(i>8)for(j=i;j>8;j--)(*out++)='0';  
    *out=0;  
}  

/* Prototypes */
extern "C" {
   my_bool  to_char_init(UDF_INIT* initid, UDF_ARGS* args, char* message );
   char     *to_char(UDF_INIT *initid, UDF_ARGS *args,char *result, unsigned long *length,char *is_null, char *error);
   void     to_char_deinit(UDF_INIT *initid);
}

//to_char
my_bool to_char_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) {
   if (args->arg_count == 0 || args->arg_count>1) {
      strcpy(message,"to_char requires at least one argument");
      return 1;
   }
   initid->const_item = 1;
   return 0;
}

//to_char
char *to_char(UDF_INIT *initid, UDF_ARGS *args,char *result, unsigned long *length,char *is_null, char *error)
{
  uint i;
  for (i = 0; i < args->arg_count; i++)
  {
    if (args->args[i] != NULL)
{
    switch (args->arg_type[i])
{
     case STRING_RESULT:
     case DECIMAL_RESULT:
{
   *length = args->lengths[i];
        strcpy(result, args->args[i]);
        return result;
     }
    case INT_RESULT:
{
  char int_buffer[65];
  long long int_val;
       int_val = *((long long*) args->args[i]);
  ullstr(int_val,int_buffer);
  *length = args->lengths[i];
  strcpy(result, int_buffer);
       return result;
}
    case REAL_RESULT:
{
   double real_val;
char real_buffer[80];
        real_val = *((double*) args->args[i]);
double2str(real_val,real_buffer);
       *length =args->lengths[i];
strcpy(result, real_buffer);
        return result;
}
    default:
      break;
    }
   }
 }
  return result;
}

void to_char_deinit(UDF_INIT *initid)
{
}

to_number(source code):

/*
* License: This code is in the public domain.
* merge oracle to_char, to_number functions into mysql5.1.
*
* gcc -fPIC -Wall -I/home/oracle/mysql5.1.55/include/mysql -shared -o to_number.so to_number.cc
* cp to_number.so  /home/oracle/mysql5.1.55/lib/mysql/plugin
* mysql -e "CREATE FUNCTION to_number RETURNS REAL  SONAME 'to_number.so'"
*
* If you get the error "ERROR 1126 (HY000): Can't open shared library
* 'to_number.so' (errno: 22  undefined symbol:
* __gxx_personality_v0)" then you may need to use g++ instead of gcc.
*
* By:philip zhong
* email:philip.zhong1980@gmail.com
* blog:http://philipzhong.blogspot.com
* Share and Enjoy! :-)
*/

#include <my_global.h>
#include <my_sys.h>
#include <mysql.h>
#include <string.h>
#include <m_string.h>

/* Mysql UDF defined functions */
extern "C" {
   my_bool  to_number_init(UDF_INIT* initid, UDF_ARGS* args, char* message );
   double   to_number(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error );
   void     to_number_deinit(UDF_INIT *initid);
}


//to_number,need add exception process.
my_bool to_number_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) {
   if (args->arg_count!=1) {
      strcpy(message,"to_number requires at least one argument");
      return 1;
   }
   initid->maybe_null = 0;      /* The result will never be NULL */
   return 0;
}

//to_number
double to_number( UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error ) {

    if (args->args[0] != NULL)
{
    switch (args->arg_type[0])
{
case DECIMAL_RESULT:
    case STRING_RESULT:
{
   return my_atof((char *)args->args[0]);
     }
case REAL_RESULT:
{
        return *((double*) args->args[0]);
}
    case INT_RESULT:
{
        return (double) *((long long*) args->args[0]);
}
    default:
      break;
    }
   }
   return 0.0;
}

void to_number_deinit(UDF_INIT *initid)
{
}



目前是beta版,大家在使用的过程中有bug,可以email给我。

下面是UDF与cast函数的性能测试结果比较:

mysql> select count(to_number(name)) from test_udf;
+------------------------+
| count(to_number(name)) |
+------------------------+
| 8188608 |
+------------------------+
1 row in set (2.78 sec)

mysql> select count(CAST(name as DECIMAL)) from test_udf;
+------------------------------+
| count(CAST(name as DECIMAL)) |
+------------------------------+
| 8188608 |
+------------------------------+
1 row in set (3.93 sec)

mysql> select count(to_char(name)) from test_udf t;
+----------------------+
| count(to_char(name)) |
+----------------------+
| 8188608 |
+----------------------+
1 row in set (3.81 sec)

mysql> select count(CAST(name as char(30))) from test_udf;
+-------------------------------+
| count(CAST(name as char(30))) |
+-------------------------------+
| 8188608 |
+-------------------------------+
1 row in set (4.45 sec)

Wednesday, August 17, 2011

Merge java hashcode function to mysql5.1(UDF)


很多应用程序基于javahashcode方法,因此应用使用javahashcode算法,但是在数据库层需要做一些与应用层次相同的hash操作,例如数据迁移操作,但是目前mysql没有提供很好的hashcode的方法,因此将javahashcode算法整合到mysql5.1中去,作为mysqlhashcode函数使用,mysqlUDF函数代码如下:(在使用的过程中遇到问题,请给我留言): 

/*
* License: This code is in the public domain.
* java hash code function for mysql5.1.
*
* gcc -fPIC -Wall -I/home/oracle/mysql5.1.55/include/mysql -shared -o hashcode.so hashcode.cc
* cp hashcode.so  /home/oracle/mysql5.1.55/lib/mysql/plugin
* mysql -e "CREATE FUNCTION hashcode RETURNS INTEGER SONAME 'hashcode.so'"
*
* If you get the error "ERROR 1126 (HY000): Can't open shared library
* 'hashcode.so' (errno: 22  undefined symbol: 
* __gxx_personality_v0)" then you may need to use g++ instead of gcc.
*
* By:philip zhong
* email:philip.zhong1980@gmail.com
* blog:http://philipzhong.blogspot.com
* Share and Enjoy! :-)
*/


#include <my_global.h>
#include <my_sys.h>
#include <mysql.h>
#include <ctype.h>
#include <string.h>
#include <stdint.h>

/* On the first call, use this as the initial_value. */
#define HASH_INIT         0
/* Default for NULLs, just so the result is never NULL. */
#define HASH_NULL_DEFAULT 0

/* Prototypes */

extern "C" {
   uint32_t   hashcode_java(uchar* input, size_t len);
   my_bool    hashcode_init(UDF_INIT* initid, UDF_ARGS* args, char* message );
   uint32_t   hashcode(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error );
}

uint32_t  hashcode_java(uchar* input,size_t len)
{
    uint32_t hash = 0;
    size_t i;
    for (i = 0; i < len; i++) {
       hash = 31 * hash + input[i];
    }
    return hash;
}

my_bool
hashcode_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) {
   if (args->arg_count == 0 ) {
      strcpy(message,"hashcode requires at least one argument");
      return 1;
   }
   initid->maybe_null = 0;      /* The result will never be NULL */
   return 0;
}


uint32_t  hashcode( UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error ) {

   uint null_default = HASH_NULL_DEFAULT;
   uint32_t result  = HASH_INIT;
   uint i;

   for (i = 0 ; i < args->arg_count; ++i ) {
      if ( args->args[i] != NULL ) 
 {
          result= hashcode_java((uchar *)args->args[i], args->lengths[i]);
      }
      else {
          result= hashcode_java((uchar *)&null_default, sizeof(null_default));
      }
   }
   return result;
}

由于javahashcode出来的值是一个有符号的数字,因此插入数据库的时候,需要重写下javahashcode函数,java的源代码如下:

/**
* overload
* @param input
* @return
*/
public static long hashcode(String input) {

long MAX_VALUE = 0xffffffffL;
int hashvalue = 0;
int len = input.length();
for (int i = 0; i < len; i++) {
hashvalue = 31 * hashvalue + input.charAt(i);
}
if (hashvalue < 0)
return MAX_VALUE + hashvalue + 1;
else
return hashvalue;
}



Test cases:

String hashvalue = "zhaoxudong";
long hashcode = hashcode(hashvalue);
hashcode=3327663837

mysql> select hashcode("zhaoxudong");
+------------------------+
hashcode("zhaoxudong")
+------------------------+
3327663837
+------------------------+



String hashvalue = "A2ff7402d89c1086119bd7f17176ba66EEEEEEEEEEEEEEEEYYYYYYYY8888888888888888888888888888888888888888888888888888888";
long hashcode = hashcode(hashvalue);
hashcode=399008718

select hashcode("A2ff7402d89c1086119bd7f17176ba66EEEEEEEEEEEEEEEEYYYYYYYY8888888888888888888888888888888888888888888888888888888") as hash;
+-----------+
hash
+-----------+
399008718
+-----------+

Thursday, August 11, 2011

Non-cryptographic hash function(UDF function) for mysql5.1

Some times, we want to use a hash funtion which is non-cryptographic,only create a number value,but mysql can't provide it, so I merge the "One At A Time hashing function" into mysql5.1,it  was originally created by Bob Jenkins.you can get my mysql hash function code from follow url:
http://forge.mysql.com/tools/tool.php?id=334

if you want use it in java and mysql,I modify the c code to java as following:

public static long hash32(String input) {
            try {
                  long hashend = 0L;
                  int hash = 0;
                  int hashbefore = 0;
                  long hashtemp = 0L;
                  for (byte bt : input.getBytes("utf-8")) {
                        hash += (bt & 0xFF);
                        hash += (hash << 10);
                        hash ^= (hash >>> 6);
                  }
                  hash += (hash << 3);
                  hash ^= (hash >>> 11);
                  hashbefore = hash;
                  hash += (hash << 15);
                  // finally result out rang of INT,
                  // so need long to save the data.
                  if (hash < 0) {
                        hashtemp = hashbefore << 15;
                        hashend = hashbefore + hashtemp;
                  } else
                        hashend = hash;
                  return hashend;
            } catch (Exception e) {
                  return 0;
            }
}

 following is java and mysql UDF testing result.

select hash32("A2ff7402d89c1086119bd7f17176ba66EEEEEEEEEEEEEEEEYYYYYYYY8888888888888888888888888888888888888888888888888888888") as hash32
+------------+
| hash32     |
+------------+
| 1143512335 |
+------------+

select hash32("1111111111111111111111111111111111111111") as hash32
+------------+
| hash32     |
+------------+
| 1867344944 |
+------------+

select hash32("aaabbbbbcccc1122228888888888888888888888") as hash32
+------------+
| hash32     |
+------------+
| 2697799865 |
+------------+

java:
hashvalue32=1143512335
hashvalue32=1867344944
hashvalue32=2697799865