#!/usr/bin/perl -w

# Compare data in two different databases
#
# 2003-08-07 Dobrica Pavlinusic
#
# somewhat based on ides from
# DataDiff 0.1 by Jon D. Frisby, http://www.mrjoy.com

use strict;
use Getopt::Long;
use DBI;
use Data::Dumper;
use Pg::Scheme;

$| = 1;

my ($debug,$verbose) = (0,0);
my ($help,$masterhost,$masterport,$masteruser,$masterpassword,
	$slavehost,$slaveport,$slaveuser,$slavepassword,
	$masterfile,
	$slavefile,
	);
my $tables;

my $result = GetOptions(
	"debug!" => \$debug, "verbose!" => \$verbose, "help" => \$help,
	"masterhost=s" => \$masterhost, "masterport=i" => \$masterport,
	"masteruser=s" => \$masteruser, "masterpassword=s" => \$masterpassword,
	"masterfile=s" => \$masterfile,
	"slavehost=s" => \$slavehost, "slaveport=i" => \$slaveport,
	"slaveuser=s" => \$slaveuser, "slavepassword=s" => \$slavepassword,
	"slavefile=s" => \$slavefile,
	"tables=s" => \$tables,
	);

if (defined($help) || (scalar(@ARGV) < 2)) {
    print "Usage: $0 [options] masterdb slavedb
Options:
	--masterhost=hostname --masterport=port
	--masteruser=username --masterpassword=string
	--masterfile=filename
	--slavehost=hostname --slaveport=port
	--slaveuser=username --slavepassword=string
	--slavefile=filename
	--table[s]=table[,table...]
";
#    exit ((scalar(@ARGV) < 2)? 1:0);
    exit;
}

my $master = $ARGV[0] || "master";
my $slave = $ARGV[1] || "slave";

my $minfo = "dbname=$master";
$minfo = "$minfo host=$masterhost" if (defined($masterhost));
$minfo = "$minfo port=$masterport" if (defined($masterport));

my $sinfo = "dbname=$slave";
$sinfo = "$sinfo host=$slavehost" if (defined($slavehost));
$sinfo = "$sinfo port=$slaveport" if (defined($slaveport));

print "Master connection is $minfo\n" if ($debug);
print "Slave connection is $sinfo\n" if ($debug);

my $mdbh = DBI->connect("DBI:Pg:$minfo", $masteruser, $masterpassword, { PrintError => 0 } );
if (! $mdbh) {
	print "Can't connect to master database $master";
	print "on $masterhost" if ($masterhost);
	print "\n";
	exit 1;
}
my $sdbh = DBI->connect("DBI:Pg:$sinfo", $slaveuser, $slavepassword, { PrintError => 0 });
if (! $sdbh) {
	print "Can't connect to slave database $slave";
	print "on $slavehost" if ($slavehost);
	print "\n";
	exit 1;
}

my ($diff_shema,$diff_data) = (0,0);

my $sql;

sub debug_sql {
	return if (! $debug);
	my $sql = shift;
	$sql =~ s/[\n\r]/ /gs;
	$sql =~ s/\s\s+/ /g;
	print STDERR "DEBUG: SQL: $sql\n";
}

sub debug_row {
	return if (! $debug);
	my $row = shift;
	my @cols = @_;
	if (! $row) {
		print STDERR "DEBUG: ROW data is undef!\n";
		return;
	}
	print STDERR "DEBUG: ROW: [",$#cols+1,"] ";
	foreach my $col (@cols) {
		print STDERR "$col:";
		if ($row->{$col}) {
			print $row->{$col};
		} else {
			print "null";
		}
		print " ";
	}
	print STDERR "\n";
}

sub debug {
	return if (!$debug);
	print STDERR "DEBUG: ",@_;
}

$verbose = 1 if ($debug);

# init object for scheme in master database
my $mscheme = new Pg::Scheme( 'dbh' => $mdbh, 'DEBUG' => 0 ) || die "can't query master schema";
my $sscheme = new Pg::Scheme( 'dbh' => $sdbh, 'DEBUG' => 0 ) || die "can't query slave schema";

# which tables to compare?

my @tables = $mscheme->list_tables($tables);

debug "Comparing tables: ".join(", ",@tables)."\n";

# start transaction
print "begin work;\n";

# disable active triggers on slave database
my @triggers = $sscheme->get_activetriggers();

foreach my $tr (@triggers) {
	print "update pg_trigger set tgenabled = false where tgname='$tr';\n";
}

my $cols;
my $diff_total = 0;

foreach my $table (@tables) {

	my $sth;

print "-- schema...\n";
	# diff schema
	foreach my $row (@{$mscheme->pg_attribute($table)}) {
#		print Dumper($row);
	}

print "-- constraints...\n";
	# diff constraints
	foreach my $tr (@{$mscheme->triggers($table)}) {
#		print Dumper($tr);
	}
print "-- triggers...\n";
	# diff triggers
	foreach my $tr (@{$mscheme->triggers($table)}) {
#		print Dumper($tr);
	}

	# all columns (for insert)
	my @cols = @{$mscheme->cols($table)};

	# columns compared by a=b
	my @cols_notnull = @{$mscheme->cols_notnull($table)};

	# columns compared by a=b or a is null and b is null
	my @cols_null = @{$mscheme->cols_null($table)};

	# primary key columns
	my @cols_pk = @{$mscheme->cols_pk($table)};

	# columns to compare (not in primary key)
	my @cols_cmp = @{$mscheme->cols_notpk($table)};

	my @cols_skip;	# skipped columns
	my @cols_test;	# all columns to test (without skipped)

	foreach my $row (@{$mscheme->pg_attribute($table)}) {
		# attname format_type attnotnull atthasdef attnum default references

		# FIXME: do something with attributes which shouldn't be compared
		# (date, time, datetime, timestamp)
		if ($row->{format_type} =~ /(date)|(time)/i) {
			push @cols_skip,$row->{attname};
		} else {
			push @cols_test,$row->{attname};
		}

	}

	if ($debug) {
		print STDERR "DEBUG: table $table not null cols: (",join(", ",@cols_notnull),")";
		print STDERR " - null cols: (",join(", ",@cols_null),")" if (@cols_null);
		print STDERR " - skip cols: (",join(", ",@cols_skip),")" if (@cols_skip);
		print STDERR "\n";
	}

	# diff data

	if (! @cols_pk) {
		print STDERR "can't find PK rows for table '$table' using all\n";
		@cols_pk = @cols;
	}


	if ($verbose) {
		print "table '$table' using for key: (",join(", ",@cols_pk),") to compare cols: (",join(", ",@cols_cmp),")\n";
	}

	# diff data

	my $msql = "select ".join(",",@cols)." from $table";
	my $ssql = $msql;

	sub sql_where {
		my @cols = @_;
		my $and = "";
		my $where = " where ";
		foreach my $col (@cols) {
			$where .= "$and$col=?";
			$and = " and ";
		}
		return $where;
	}

	sub sql_order {
		my @cols = @_;
		my $order = " order by ";
		my $comma = "";
		foreach my $col (@cols) {
			$order .= "$comma$col asc";
			$comma = ", ";
		}
		return $order;
	}

	my $order = sql_order(@cols_pk);
	$msql .= $order;
	$ssql .= $order;

	debug_sql($msql);

	my $msth = $mdbh->prepare($msql) || die $mdbh->errstr;
	$msth->execute() || die $msth->errstr;

	my $ssth = $sdbh->prepare($ssql) || die $sdbh->errstr;
	$ssth->execute() || die $ssth->errstr;

	my $diff_row = 0;

	my ($mrow,$srow);
	# have_*
	use constant NO_ROW => 0;
	use constant FETCH_ROW => 1;
	use constant HAVE_ROW => 2;
	my ($have_mrow,$have_srow) = (FETCH_ROW,FETCH_ROW);

	while ($have_mrow != NO_ROW || $have_srow != NO_ROW) {

		debug "have mrow: $have_mrow srow: $have_srow\n";

		sub pk_val {
			my $row = shift || die "need row";
			my @cols = shift || die "need cols";
			my @val;
			foreach my $col (@cols) {
				push @val,$row->{$col};
			}
			return @val;
		}

		# fetch row from master
		if ($have_mrow == FETCH_ROW) {
			debug "fetch row from master: $msql\n";
			$mrow = $msth->fetchrow_hashref();
			debug_row($mrow,@cols);

			if ($mrow) {
				# fill-in primary key values
				$have_mrow = HAVE_ROW;
			} else {
				$have_mrow = NO_ROW;
			}
		}

		# fetch row from slave
		if ($have_srow == FETCH_ROW) {
			debug "fetch row from slave: $ssql\n";
			$srow = $ssth->fetchrow_hashref();
			debug_row($srow,@cols);
			if ($srow) {
				$have_srow = HAVE_ROW;
			} else {
				$have_srow = NO_ROW;
			}
		}

		debug "have mrow: $have_mrow srow: $have_srow\n";

		# insert into slave database
		sub sql_insert {
			my $dbh = shift @_ || die "need dbh";
			my $table = shift @_ || die "need table as argument";
			my $row = shift @_ || die "need row data";
			my @cols = @_;

			my $sql = "insert into $table (".join(",",@cols).") values (";
			my $comma = "";
			foreach my $col (@cols) {
				$sql .= $comma.$dbh->quote($row->{$col});
				$comma = ",";
			}
			$sql.=")";
			debug_sql($sql);
			return $sql;
		}

		# delete from slave database
		sub sql_delete {
			my $dbh = shift @_ || die "need dbh";
			my $table = shift @_ || die "need table as argument";
			my $row = shift @_ || die "need row as argument";
			my @cols_pk = @_;

			my $where = sql_where(@cols_pk);

			my $sql = "delete from $table";
			foreach my $col (@cols_pk) {
				my $val = $dbh->quote($row->{$col}) || die "can't find value in row for col $col";
				$where =~ s/\?/$val/;
			}
			$sql .= $where;
			debug_sql($sql);
			return $sql;
		}

		# update row in slave database
		sub sql_update {
			my $dbh = shift @_ || die "need dbh";
			my $table = shift @_ || die "need table as argument";
			my $col = shift @_ || die "need col to update";
			my $row = shift @_ || die "need row";
			my @cols_pk = @_;

			my $sql = "update $table set $col=".$dbh->quote($row->{$col});
			my $where = sql_where(@cols_pk);
			foreach my $col (@cols_pk) {
				my $val = $dbh->quote($row->{$col}) || die "can't find value in row for col $col";
				$where =~ s/\?/$val/;
			}
			$sql .= $where;
			debug_sql($sql);
			return $sql;
		}
		# master	slave
		# 1	=	1	test
		# 1	<	2	insert mrow
		# 2	>	1	delete srow
		# 1	=	undef	insert mrow
		# undef	=	1	delete srow

		my $pk_same = 1;

		# check key cols for row
		foreach my $col (@cols_pk) {
			if ( ($have_mrow == NO_ROW && $have_srow == HAVE_ROW) ||
				($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} gt $srow->{$col}) ) {
				$diff_row++;
				$pk_same = 0;
				print STDERR "EXTRA row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($srow,@cols_pk)).")\n" if ($verbose);
				print sql_delete($sdbh,$table,$srow,@cols_pk),";\n";
				$have_srow = FETCH_ROW;
				last;
			} elsif ( ($have_mrow == HAVE_ROW && $have_srow == NO_ROW) ||
				($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} lt $srow->{$col}) ) {
				$diff_row++;
				$pk_same = 0;
				print STDERR "MISSING row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($mrow,@cols_pk)).")\n" if ($verbose);
				print sql_insert($mdbh,$table,$mrow,@cols),";\n";
				$have_mrow = FETCH_ROW;
				last;
			}
		}

		if ($pk_same && $have_mrow == HAVE_ROW && $have_srow == HAVE_ROW) {
			# check non-key cols for row
			foreach my $col (@cols_cmp) {
				if ($mrow->{$col} ne $srow->{$col}) {
					$diff_row++;
					print STDERR "DIFF in table '$table' row ($col): [".join(",",@cols_pk)."] '$mrow->{$col}' != '$srow->{$col}'\n" if ($verbose);
					print sql_update($mdbh,$table,$col,$mrow,@cols_pk),";\n";
				}
			}
			$have_mrow = FETCH_ROW;
			$have_srow = FETCH_ROW;
		}
	}

	print STDERR "$diff_row differences in table $table\n" if ($verbose && $diff_row > 0);
	$diff_total += $diff_row;
}

if ($verbose) {
	if ($diff_total == 0) {
		print STDERR "databases are same\n";
	} elsif ($diff_total > 0) {
		print STDERR "$diff_total differences in all tables\n";
	} else {
		die "this shouldn't happend. please report a bug!";
	}
}

# enable triggers again on slave
foreach my $tr (@triggers) {
	print "update pg_trigger set tgenabled = true where tgname='$tr';\n";
}
# end transaction
print "commit;\n";

$mdbh->disconnect();
$sdbh->disconnect();
