LZ77 slow compression speed

509 Views Asked by At

I'm writing simple compression program using LZ77 algorithm. My problem is very slow compression speed on any big files (for 2 MB image it takes more than 1 minute if buffer size is 12 and dictionary size is 4096). I use Boyer-Moore-Horspool algorithm for searching current buffer prefixes in the dictionary. Please, tell me what could cause such slowdown and are there any ways to improve this code's performance?

   void findLongestMatch(unsigned char* d, unsigned char* b, short &len, short &off)
    {
        short alphabet[256];
        short shift = 0;
        short dict_pos=0;
        bool found = false;
        if(cur_dict_length==0) { return; }
        for(int prefix_length=cur_buf_length-1; prefix_length>=0; prefix_length--)
        {
                found=false;
                for(int j=0; j<256; j++)
                {
                    alphabet[j]=prefix_length+1;
                }
                for(int j=prefix_length; j>=1; j--)
                {
                    alphabet[(unsigned char)b[j]]=j;
                }
                shift = 0;
                dict_pos = cur_dict_length-(prefix_length+1);
                while(dict_pos>=0)
                {
                    if(memcmp(&d[dict_pos], &b[0], prefix_length+1)==0)
                    {
                        found=true;
                        len=prefix_length+1;
                        off=cur_dict_length-dict_pos;
                        break;
                    }
                shift = alphabet[(unsigned char)d[dict_pos]];
                dict_pos = dict_pos-shift;
                }
                if(found==true) break;
            }
       return;
    }

void compressData(long block_size, unsigned char* s, fstream &file_out)
{
    unsigned char buf_out[3];
    unsigned char* dict;
    unsigned char* buf;
    link myLink;
    file_out.seekp(0, ios_base::beg);
    cur_dict_length = 0;
    cur_buf_length = buf_length;
    for(int i=0; i<block_size; i++)
    {
        buf=&s[i];
        dict=&s[i-cur_dict_length];
        myLink.length=0;myLink.offset=0;
        findLongestMatch(dict,buf,myLink.length,myLink.offset);
        if(myLink.length<=buf_length) myLink.next=buf[myLink.length];
        else myLink.next=s[i];
        compactLink(myLink, buf_out);
        for(int j=0; j<3; j++)
        {
        file_out << buf_out[j];
        }
        i=i+myLink.length;
        if(cur_dict_length<dict_length) {
        cur_dict_length=cur_dict_length+1+myLink.length;
        if(cur_dict_length>dict_length) cur_dict_length=dict_length;
        }
        if(i+cur_buf_length>=block_size) cur_buf_length=cur_buf_length-1-(i+cur_buf_length-block_size);
   }
}
1

There are 1 best solutions below

0
Mark Adler On

As noted, it's the algorithm that is your issue. You can use a chained hash like deflate, or a suffix tree.