--- paq9a.cpp.bak 2008-01-01 01:04:27.000000000 +0200 +++ paq9a.cpp 2008-05-02 01:16:54.105440556 +0300 @@ -235,10 +235,13 @@ #include #include #include +#include #define NDEBUG // remove for debugging #include -int allocated=0; // Total memory allocated by alloc() +size_t allocated=0; // Total memory allocated by alloc() + +#define ALIGN_CACHELINE (64) // Create an array p of n elements of type T template void alloc(T*&p, int n) { @@ -247,10 +250,9 @@ allocated+=n*sizeof(T); } -// 8, 16, 32 bit unsigned types (adjust as appropriate) -typedef unsigned char U8; -typedef unsigned short U16; -typedef unsigned int U32; +typedef uint8_t U8; +typedef uint16_t U16; +typedef uint32_t U32; ///////////////////////////// Squash ////////////////////////////// @@ -417,9 +419,9 @@ // update bit y (0..1) void update(int y, int limit=255) { assert(cxt>=0 && cxt>10; // count, prediction + int n = t[cxt]&1023, p = t[cxt]>>10; // count, prediction if (n>3)*dt[n]&0xfffffc00; } @@ -465,16 +467,16 @@ int pp(int p1, int p2, int cx) { assert(cx>=0 && cx>16)+(x2=p2)*(wt[cxt+1]>>16)+128>>8; + return pr = ((x1=p1) * (wt[cxt]>>16) + (x2=p2) * (wt[cxt+1]>>16)+128) >> 8; } void update(int y) { assert(y==0 || y==1); int err=((y<<12)-squash(pr)); if ((wt[cxt]&3)<3) - err*=4-(++wt[cxt]&3); - err=err+8>>4; - wt[cxt]+=x1*err&-4; - wt[cxt+1]+=x2*err; + err *= 4-(++wt[cxt]&3); + err = (err+8) >> 4; + wt[cxt] += (x1*err) & -4; + wt[cxt+1] += x2*err; } }; @@ -525,8 +527,8 @@ HashTable::HashTable(int n): t(0), N(n) { assert(B>=2 && (B&B-1)==0); assert(N>=B*4 && (N&N-1)==0); - alloc(t, N+B*4+64); - t+=64-int(((long)t)&63); // align on cache line boundary + alloc(t, N+B*4+ALIGN_CACHELINE); + t = (U8*) ((((uintptr_t)t) + ALIGN_CACHELINE - 1) & -ALIGN_CACHELINE); } template @@ -535,13 +537,13 @@ i=i<<16|i>>16; i*=234567891; int chk=i>>24; - i=i*B&N-B; - if (t[i]==chk) return t+i; - if (t[i^B]==chk) return t+(i^B); - if (t[i^B*2]==chk) return t+(i^B*2); - if (t[i+1]>t[i+1^B] || t[i+1]>t[i+1^B*2]) i^=B; - if (t[i+1]>t[i+1^B^B*2]) i^=B^B*2; - memset(t+i, 0, B); + i = (i * B) & (N - B); + if (t[i] == chk) return t + i; + if (t[i ^ B] == chk) return t + (i ^ B); + if (t[i ^ (B*2)] == chk) return t + (i ^ (B * 2)); + if (t[i + 1] > t[(i + 1) ^ B] || t[i + 1] > t[(i + 1) ^ (B*2)]) i ^= B; + if (t[ i +1] > t[(i + 1) ^ B ^ (B * 2)]) i ^= B ^ (B * 2); + memset(t + i, 0, B); t[i]=chk; return t+i; } @@ -625,36 +627,36 @@ // Predicted next byte, or -1 for no prediction inline int LZP::c() { - return len>=MINLEN ? buf[match&N-1] : -1; + return len >= MINLEN ? buf[match & (N-1)] : -1; } // Return i'th byte of context (i > 0) inline int LZP::c(int i) { assert(i>0); - return buf[pos-i&N-1]; + return buf[(pos-i) & (N-2)]; } // Return prediction that c() will be the next byte (0..4095) int LZP::p() { - if (len28) cxt=28+(len>=32)+(len>=64)+(len>=128); - int pc=c(); - int pr=sm1.p(cxt); - pr=stretch(pr); - pr=a1.pp(2048, pr*2, h2*256+pc&0xffff)*3+pr>>2; - pr=a2.pp(2048, pr*2, h1*(11<<6)+pc&0x3ffff)*3+pr>>2; - pr=a3.pp(2048, pr*2, h1*(7<<4)+pc&0xfffff)*3+pr>>2; - pr=squash(pr); + if (len < MINLEN) return 0; + int cxt = len; + if (len>28) cxt = 28 + (len >= 32) + (len >= 64) + (len >= 128); + int pc = c(); + int pr = sm1.p(cxt); + pr = stretch(pr); + pr = ((a1.pp(2048, pr*2, (h2*256 + pc) & 0xffff) * 3) + pr) >> 2; + pr = ((a2.pp(2048, pr*2, (h1*(11<<6) + pc) & 0x3ffff) * 3) + pr) >> 2; + pr = ((a3.pp(2048, pr*2, (h1*(7<<4) + pc) & 0xfffff) * 3) + pr) >> 2; + pr = squash(pr); return pr; } // Update model with predicted byte ch (0..255) void LZP::update(int ch) { - int y=c()==ch; // 1 if prediction of ch was right, else 0 - h1=h1*(3<<4)+ch+1; // update context hashes - h2=h2<<8|ch; - h=h*(5<<2)+ch+1&H-1; + int y = c()==ch; // 1 if prediction of ch was right, else 0 + h1 =h1 * (3<<4) +ch +1; // update context hashes + h2 = h2 << 8 | ch; + h = ((h * (5<<2)) + (ch + 1)) & (H - 1); if (len>=MINLEN) { sm1.update(y); a1.update(y); @@ -665,7 +667,7 @@ word0=word0*(29<<2)+tolower(ch); else if (word0) word1=word0, word0=0; - buf[pos&N-1]=ch; // update buf + buf[pos & (N-1)]=ch; // update buf ++pos; if (y) { // extend match ++len; @@ -677,8 +679,8 @@ y=0; len=1; match=t[h]; - if (!((match^pos)&N-1)) --match; - while (len<=128 && buf[match-len&N-1]==buf[pos-len&N-1]) ++len; + if (!((match^pos)&(N-1))) --match; + while (len<=128 && buf[(match-len) & (N-1)] == buf[(pos-len) & (N-1)]) ++len; --len; } t[h]=pos; @@ -756,7 +758,7 @@ // Set context pointers int pc=lzp->c(); // mispredicted byte - int r=pc+256>>8-bcount==c0; // c0 consistent with mispredicted byte? + int r = ((pc + 256) >> (8 - bcount)) == c0; // c0 consistent with mispredicted byte? U32 c4=lzp->c4(); // last 4 whole context bytes, shifted into LSB U32 c8=(lzp->c8()<<4)-1; // hash of last 7 bytes with 4 trailing 1 bits if ((bcount&3)==0) { // nibble boundary? Update context pointers @@ -784,11 +786,11 @@ for (int i=1; i>2; + pr = ((m[i-1].pp(pr, stretch(sm[i].p(st)), st+r) * 3) + pr) >> 2; } - pr=a1.pp(512, pr*2, c0+pc*256&0xffff)*3+pr>>2; // Adjust prediction - pr=a2.pp(512, pr*2, c4<<8&0xff00|c0)*3+pr>>2; - pr=a3.pp(512, pr*2, c4*3+c0&0xffff)*3+pr>>2; + pr=((a1.pp(512, pr*2, (c0 + (pc*256)) & 0xffff) * 3) + pr) >> 2; // Adjust prediction + pr=((a2.pp(512, pr*2, ((c4 << 8) & 0xff00) | c0) * 3) + pr) >> 2; + pr=((a3.pp(512, pr*2, ((c4 * 3) + c0) & 0xffff) * 3) + pr) >> 2; return squash(pr); } } @@ -845,7 +847,7 @@ int p=predictor->p(); assert(p>=0 && p<4096); p+=p<2048; - U32 xmid=x1 + (x2-x1>>12)*p + ((x2-x1&0xfff)*p>>12); + U32 xmid = x1 + ((x2 - x1) >> 12) * p + ((((x2 - x1) & 0xfff) * p) >> 12); assert(xmid>=x1 && xmid>10); + printf("%8zu KiB\b\b\b\b\b\b\b\b\b\b\b\b", allocated>>10); } if (mode==COMPRESS) { Encoder e(COMPRESS, out); @@ -1021,7 +1023,7 @@ } // Compress filename to out. option is 'c' to compress or 's' to store. -void compress(const char* filename, FILE* out, int option) { +void compress_file(const char* filename, FILE* out, int option) { // Open input file FILE* in=fopen(filename, "rb"); @@ -1041,7 +1043,7 @@ } // List archive contents -void list(const char* archive) { +void list_archive(const char* archive) { double usum=0, csum=0; // uncompressed and compressed size per file double utotal=0, ctotal=4; // total size in archive static char filename[MAXNAMELEN+1]; @@ -1077,7 +1079,7 @@ int csize=get4(in); csum+=csize+10; - if (usize<0 || csize<0 || mode!='c' && mode!='s') + if (usize<0 || csize<0 || (mode!='c' && mode!='s')) printf("Archive corrupted usize=%d csize=%d mode=%d at %ld\n", usize, csize, mode, ftell(in)), exit(1); @@ -1097,14 +1099,14 @@ // Extract files given command line arguments // Input format is: [filename {'\0' mode usize csize contents}...]... -void extract(int argc, char** argv) { +void extract_archive(int argc, char** argv) { assert(argc>2); assert(argv[1][0]=='x'); static char filename[MAXNAMELEN+1]; // filename from archive // Open archive FILE* in=open_archive(argv[2]); - MEM=1<<22+MEM-'0'; + MEM = 1 << (22 + MEM - '0'); // Extract files argc-=3; @@ -1188,7 +1190,7 @@ if (argc>3 && argv[3][0]=='-' && argv[3][1]>='1' && argv[3][1]<='9' && argv[3][2]==0) { putc(argv[3][1], out); - MEM=1<<22+argv[3][1]-'0'; + MEM = 1 << (22 + argv[3][1] - '0'); ++i; } else @@ -1198,7 +1200,7 @@ && argv[i][2]==0) option=argv[i][1]; else - compress(argv[i], out, option); + compress_file(argv[i], out, option); } printf("-> %ld in %1.2f sec\n", ftell(out), double(clock()-start)/CLOCKS_PER_SEC); @@ -1206,17 +1208,17 @@ // List archive contents else if (argv[1][0]=='l') - list(argv[2]); + list_archive(argv[2]); // Extract from archive else if (argv[1][0]=='x') { - extract(argc, argv); + extract_archive(argc, argv); printf("%1.2f sec\n", double(clock()-start)/CLOCKS_PER_SEC); } // Report statistics delete predictor; delete lzp; - printf("Used %d KiB memory\n", allocated>>10); + printf("Used %zu KiB memory\n", allocated>>10); return 0; }