Pergunta

I'm trying to create 2 UDFs for MySQL that will compress and decompress data, for further storing of it in database. With some help from the folks on this website I've created a function that successfully compresses and then decompresses data:

#include "string.h"
#include "stdio.h"
#include "mysql/mysql.h"
#include "zlib.h"
#include "stdlib.h"

int main ()
{


const char *istream = "test sentence";
ulong srcLen = strlen(istream)+1;
ulong destLen = compressBound(srcLen);
char* ostream = malloc(destLen);
int res = compress(ostream, &destLen, istream, srcLen);
if (res == Z_OK) printf("%s\n", ostream);
else printf("%i", res);


const char *data = ostream;
ulong size = strlen(data) + 1;
char *ret = malloc(size);
unsigned long new_size;

int rez;
//int i = 0;
int sz = 8*size;
new_size = sz = sz + 8 - (sz % 8);
for(;;)
{
    //fprintf(stderr,"%d[u]: %d\n",++i,(int)sz);
       ret = realloc(ret,sz);
       rez = uncompress(ret, &new_size, data, new_size);
       if(Z_BUF_ERROR == rez){
        sz*=2;
        new_size = sz;
        continue;
    }
    break;
}
if(Z_OK==rez){
ret = realloc(ret,new_size + 8 - (new_size % 8));
printf("%s\n", ret);
}
else printf("%i", rez);



return 0;
}

It works like a charm, so then I tried to recreate it in form of 2 UDFs for MySQL. Compression function:

#include "string.h"
#include "stdio.h"
#include "mysql/mysql.h"
#include "zlib.h"
#include "stdlib.h"
my_bool CompFunc_init( UDF_INIT *initid, UDF_ARGS *args, char *msg)
{
if (args->arg_count != 1)
{
    memcpy(msg, "Missing message argument.", 26);
    return 1;
}
if (args->arg_type[0] != STRING_RESULT)
{
    args->arg_type = STRING_RESULT;
}
initid->ptr = malloc(compressBound(strlen(args->args[0])) + 1);
return 0;
}
char *CompFunc( UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, my_bool *is_null, my_bool *error)
{
const char *istream = args->args[0];
ulong srcLen = strlen(istream)+1;
ulong destLen = compressBound(srcLen);
int res = compress(initid->ptr, &destLen, istream, srcLen);
if (res == Z_OK) {
    *length = destLen;
    sprintf(result,"%s", initid->ptr);
    return result;
}
else { sprintf(result, "%i", res); return result;}
}
void CompFunc_deinit(UDF_INIT *initid)
{
free(initid->ptr);
}

Decompression function:

#include "string.h"
#include "stdio.h"
#include "mysql/mysql.h"
#include "zlib.h"
#include "stdlib.h"

my_bool UCompFunc_init( UDF_INIT *initid, UDF_ARGS *args, char *msg)
{
if (args->arg_count != 1)
{
    memcpy(msg, "Missing message argument.", 26);
    return 1;
}
if (args->arg_type[0] != STRING_RESULT)
{
    args->arg_type = STRING_RESULT;
}
initid->ptr = malloc(strlen(args->args[0]) + 1);
return 0;
}
char *UCompFunc( UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, my_bool *is_null, my_bool *error)
{
const char *data = args->args[0];
ulong size = strlen(data) + 1;
unsigned long new_size;

int rez;
//int i = 0;
int sz = 8*size;
new_size = sz = sz + 8 - (sz % 8);
for(;;)
{
    //fprintf(stderr,"%d[u]: %d\n",++i,(int)sz);
        initid->ptr = realloc(initid->ptr,sz);
        rez = uncompress(initid->ptr, &new_size, data, new_size);
        if(Z_BUF_ERROR == rez){
        sz*=2;
        new_size = sz;
        continue;
    }
    break;
}
if(Z_OK==rez) initid->ptr = realloc(initid->ptr,new_size + 8 - (new_size % 8));
else { sprintf(result, "%i", rez); return result;}
*length = new_size;
sprintf(result,"%s", initid->ptr);
return result;
}
void UCompFunc_deinit(UDF_INIT *initid)
{
free(initid->ptr);
}

The compression UDF seems to work properly, but decompression UDF will not return the decompressed text, but an error of Z_BUF_ERROR, which means that there is something wrong with input buffer! Could anyone point out the mistake for me?

Foi útil?

Solução

  • You can't use strlen() to get the length of the compressed data. It is not terminated by a zero, and it very likely contains zeros within it. The result from strlen() might well be a very small number even if the compressed data is very long. The length of the compressed data is returned in the second argument of compress(). You should use that.
  • After sz = 8*whatever, you can be assured that sz % 8 is zero.
  • Reallocating the output buffer and retrying the entire decompression each time is a huge waste of time. You should use the inflateInit(), inflate(), and inflateEnd() functions instead, to decompress as much as you can into the available buffer, and then reallocate.
  • Better still would be to send the uncompressed length with the compressed data, so that you only have to do one allocation.
  • The fourth argument of uncompress() must be the length of the compressed data. Not the length of the output buffer repeated.
  • How can you ever return Z_BUF_ERROR given that you remain in an infinite loop if that is returned by uncompress()?

Bottom line: use uncompress() per the documentation.

Licenciado em: CC-BY-SA com atribuição
Não afiliado a StackOverflow
scroll top