// ddiff/dpatch - manipulate differences between debian packages.
// Copyright 2000 Tom Rothamel <tom-ddiff@onegeek.org>
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  

#include <stdio.h>
#include <string.h>
#include "genfile.h"
#include "bdiff.h"

// This code handles the creation of binary diffs between files. One
// of the properties of the generated diffs is that the seek commands
// generated only increase... This makes patching the files very
// efficent in debdiff, where going back in a file tends to be
// rather expensive.

#define debug(x, y...) fprintf(stderr, "bdiff: " x "\n" , ## y )
#define BS 256
#define NCS_OVER 4

// This is a hack to convert from the original file-based version of
// this code to the new gfile-based version.
#define gfread(a, b, c, d) d->read(a, b, c)

struct Block {
	int valid;
	int start;

	unsigned char data[BS];
};

class BDiff {
	GFILE *fromf;
	GFILE *tof;
	FILE *outf;
	
	Block *b; // Blocks.
	int *csums; // Block checksums, stored separately for speed.
	int *ncs; // The number of blocks with a particular checksum.
	
	unsigned char to[BS];
	int tosplit;
	
	int biu; // Blocks in use.
	int blocks; // Blocks allocated.
	int nbs; // Next block's start
	int done; // Done reading blocks?
	int lbr; // Last byte read.

	int checksum(unsigned char *, int);
	int rechecksum(int, unsigned char, unsigned char);
	int compareb(unsigned char *);

	// From to.
	int readbyte();

	// From from.
	int getbyte();
	int readblock();
	void readblocks(int);

	Block *findblock(int);
	int onediff();
	void out(char);
	void outint(int);
	void outflag();
	void outbyte(char);
	
	
public:
	void diff(GFILE *, GFILE *, FILE *);

	BDiff(int);
	~BDiff();	
};

int BDiff::checksum(unsigned char *b1, int b1l) {
	int sum = 0;
	int i;

	for (i = 0; i < b1l; i++) {
		sum += b1[i];
	}

	return sum;
}

int BDiff::rechecksum(int sum, unsigned char out, unsigned char in) {
	sum -= out;
	sum += in;
	return sum;
}

int BDiff::compareb(unsigned char *buf) {
	int i;

	for (i = 0; i < BS; i++) {
		if (to[(tosplit + i) % BS] != buf[i]) return 0;
	}

	return 1;
}

int BDiff::readbyte() {
	unsigned char buf;

	if (!gfread(&buf, 1, 1, tof)) return -1;
	return buf;
}

int BDiff::readblock() {
	int lr;
	Block *bl;
	int bn;
	
	if (done) return 0;

	bn = (nbs / BS) % blocks;
	bl = &b[bn];

	if (bl->valid) {
		ncs[csums[bn] % (blocks * 4)]--;
	}
	
//	if (lbr < nbs - BS * (blocks - 1)) lbr += BS;

	lr = gfread(bl->data, 1, BS, fromf);

	if (lr != BS) {
		done = 1;
		bzero((void *) &(bl->data[lr]), BS - lr);
	}

	bl->valid = 1;
	bl->start = nbs;
	csums[bn] = checksum(bl->data, BS);
	ncs[csums[bn] % (blocks * 4)]++;

	
	if (biu < blocks) biu++;
	nbs += BS;
	return 1;
	
}

int BDiff::getbyte() {
	int loc = lbr;
	int off = loc % BS;
	int bn = (loc / BS) % blocks; 
	
	readblocks(loc);

	if (!b[bn].valid) return -1;	
	if (b[bn].start != loc - off) return -1;

	lbr++;

	return b[bn].data[off];
}
	

void BDiff::readblocks(int start) {
	int end;

	end = start + BS * (blocks - 1);
	while (nbs < end) {
		if (!readblock()) return;
	}
}

void BDiff::out(char c) {
	if (c == BDIFF_ENTRY) fputc(BDIFF_ENTRY, outf);
	fputc(c, outf);
}

void BDiff::outint(int n) {
	int i;
	int dba[] = {16777216, 65536, 256, 1};

	for (i = 0; i < 4; i++) {
		fputc((n / dba[i]), outf);
		n %= dba[i];
	}
}	

void BDiff::outflag() {
	fputc(BDIFF_ENTRY, outf);
}

void BDiff::outbyte(char c) {
	fputc(c, outf);
}

Block *BDiff::findblock(int cs) {
	int bln;
	int i;
	int bn;
	
	bln = lbr / 512;

	if (!ncs[cs % (blocks * 4)]) return NULL;
	
	for (i = 0; i < biu; i++) {
		bn = (bln + i) % blocks;

//		debug("bn %d valid = %d start = %d checksum = %d vs %d.",
//		      bn, b[bn].valid, b[bn].start, b[bn].checksum, cs);

		if (csums[bn] != cs) continue;
		
		if (!b[bn].valid) continue;
		if (b[bn].start < lbr) continue;

		if (compareb(b[bn].data)) return &b[bn];
	}

	return NULL;
}
	       
int BDiff::onediff() {
	int lr;
	int csum;
	int i;
	Block *bl;
	int nc;
	int toc;
	int fromc;
	int count;
	int bytesl;
	
	readblocks(lbr);

	// Try to read in a block of data from the to stream.
	lr = gfread(to, 1, BS, tof);
	tosplit = 0;

	// If we can't, output what we have and leave.
	if (lr != BS) {
		for(i = 0; i < lr; i++) {
			out(to[i]);
		}

		return 0;
	}

	// Otherwise, we try to match a block. If we can't find a match,
	// output one byte, rechecksum, and try again. If we run out of
	// file, output the block we have and go home. Otherwise, go
	// on.
	csum = checksum(to, BS);

	bytesl = blocks * BS;
	
	while (1) {

		// If we can't find a match within bytesl, we never will,
		// so give up.
		if (bytesl) {
			if ((bl = findblock(csum))) break;
			bytesl--;
		}
			
		nc = readbyte();
		if(nc == -1) {
			for (i = 0; i < BS; i++) {
				out(to[(tosplit + i) % BS]);
			}
			return 0;
		}
		
		out(to[tosplit]);

		csum = rechecksum(csum, to[tosplit], (unsigned char) nc);
		
		to[tosplit] = nc;
		tosplit++;
		tosplit %= BS;
	}

	lbr = bl->start + BS;
	count = BS;
	
	// Output a seek command. (This leaves us in command mode.)
	outflag();
	outbyte(BDIFF_CMD_SEEK4);
	outint(bl->start);

	// Now, figure out how many more characters are the same.
	while (1) {
		toc = readbyte();
		fromc = getbyte();

		if (toc == -1) break;
		if (fromc == -1) break;
		if (toc != fromc) break;
		count++;
	}

	// Output the count of bytes and the leave the command mode.
	outbyte(BDIFF_CMD_COPY4);
	outint(count);
	outflag();
	
	// If we haven't run out of bytes in to, we print the byte
	// that's lying around, and ask for more. Otherwise, we terminate.
	if (toc != -1) {
		out(toc);
		return 1;
	} else {
		return 0;
	}
}

void BDiff::diff(GFILE *f, GFILE *t, FILE *o) {
	int i;
	
	fromf = f;
	tof = t;
	outf = o;
	nbs = 0;
	biu = 0;
	done = 0;
	lbr = 0;
	
	for (i = 0; i < blocks; i++) {
		b[i].valid = 0;
	}

	while (onediff()) {}

	outflag();
	outbyte(BDIFF_CMD_DONE);
	
}
	
BDiff::BDiff(int n) {
	int i;

	fromf = NULL;
	tof = NULL;

	blocks = n;
	b = new Block[n];
	csums = new int[n];
	ncs = new int[n * NCS_OVER];	

	for(i = 0; i < n * NCS_OVER; i++) {
		ncs[i] = 0;
	}
}

BDiff::~BDiff() {
	delete[] b;
	delete[] csums;
	delete[] ncs;
}


// The following is the procedural-type interface to this code, for
// those of us who don't like objects or something.

static BDiff *bd = NULL;

// Initialization. Tells us to use blocks blocks to search for matches and
// stuff.
void bdiff_init(int blocks) {
	if (bd) delete bd;
	bd = new BDiff(blocks);
}

// Cleanup.
void bdiff_free() {
	if (bd) delete bd;
	bd = NULL;
}

// Perform a bdiff. bdiff_init (and not bdiff_free) must be called before
// this.
void bdiff(GFILE *from, GFILE *to, FILE *out) {
	if (!bd) return;
	bd->diff(from, to, out);
}

// int main(int argc, char **argv) {
// 	GFILE *from;
// 	GFILE *to;
// 	BDiff *dd;

// 	from = open_gfstdio(argv[1]);
// 	to = open_gfstdio(argv[2]);

// 	bdiff_init(256);
// 	bdiff(from, to, stdout);
// 	bdiff_free();

// 	exit(0);
// }
	
