#!/usr/bin/perl -w # Compare data in two different databases # # 2003-08-07 Dobrica Pavlinusic # # somewhat based on ides from # DataDiff 0.1 by Jon D. Frisby, http://www.mrjoy.com use strict; use Getopt::Long; use DBI; use Data::Dumper; use Pg::Scheme; $| = 1; my ($debug,$verbose) = (0,0); my ($help,$masterhost,$masterport,$masteruser,$masterpassword, $slavehost,$slaveport,$slaveuser,$slavepassword, $masterfile, $slavefile, ); my $tables; my $result = GetOptions( "debug!" => \$debug, "verbose!" => \$verbose, "help" => \$help, "masterhost=s" => \$masterhost, "masterport=i" => \$masterport, "masteruser=s" => \$masteruser, "masterpassword=s" => \$masterpassword, "masterfile=s" => \$masterfile, "slavehost=s" => \$slavehost, "slaveport=i" => \$slaveport, "slaveuser=s" => \$slaveuser, "slavepassword=s" => \$slavepassword, "slavefile=s" => \$slavefile, "tables=s" => \$tables, ); if (defined($help) || (scalar(@ARGV) < 2)) { print "Usage: $0 [options] masterdb slavedb Options: --masterhost=hostname --masterport=port --masteruser=username --masterpassword=string --masterfile=filename --slavehost=hostname --slaveport=port --slaveuser=username --slavepassword=string --slavefile=filename --tables[s]=table[,table...] "; # exit ((scalar(@ARGV) < 2)? 1:0); exit; } my $master = $ARGV[0] || "master"; my $slave = $ARGV[1] || "slave"; my $minfo = "dbname=$master"; $minfo = "$minfo host=$masterhost" if (defined($masterhost)); $minfo = "$minfo port=$masterport" if (defined($masterport)); my $sinfo = "dbname=$slave"; $sinfo = "$sinfo host=$slavehost" if (defined($slavehost)); $sinfo = "$sinfo port=$slaveport" if (defined($slaveport)); print "Master connection is $minfo\n" if ($debug); print "Slave connection is $sinfo\n" if ($debug); my $mdbh = DBI->connect("DBI:Pg:$minfo", $masteruser, $masterpassword, { PrintError => 0 } ); if (! $mdbh) { print "Can't connect to master database $master"; print "on $masterhost" if ($masterhost); print "\n"; exit 1; } my $sdbh = DBI->connect("DBI:Pg:$sinfo", $slaveuser, $slavepassword, { PrintError => 0 }); if (! $sdbh) { print "Can't connect to slave database $slave"; print "on $slavehost" if ($slavehost); print "\n"; exit 1; } my ($diff_shema,$diff_data) = (0,0); my $sql; sub debug_sql { return if (! $debug); my $sql = shift; $sql =~ s/[\n\r]/ /gs; $sql =~ s/\s\s+/ /g; print STDERR "DEBUG: SQL: $sql\n"; } sub debug_row { return if (! $debug); my $row = shift; my @cols = @_; if (! $row) { print STDERR "DEBUG: ROW data is undef!\n"; return; } print STDERR "DEBUG: ROW: [",$#cols+1,"] "; foreach my $col (@cols) { print STDERR "$col:"; if ($row->{$col}) { print $row->{$col}; } else { print "null"; } print " "; } print STDERR "\n"; } sub debug { return if (!$debug); print STDERR "DEBUG: ",@_; } $verbose = 1 if ($debug); # init object for scheme in master database my $mscheme = new Pg::Scheme( 'dbh' => $mdbh, 'DEBUG' => 0 ) || die "can't query master schema"; my $sscheme = new Pg::Scheme( 'dbh' => $sdbh, 'DEBUG' => 0 ) || die "can't query slave schema"; # which tables to compare? my @tables = $mscheme->list_tables($tables); debug "Comparing tables: ".join(", ",@tables)."\n"; # start transaction print "begin work;\n"; # disable active triggers on slave database my @triggers = $sscheme->get_triggers(); foreach my $tr (@triggers) { print "update pg_trigger set tgenabled = false where tgname='$tr';\n"; } my $cols; my $diff_total = 0; foreach my $table (@tables) { my $sth; # diff schema # all colums (for insert) my @cols = @{$mscheme->cols($table)}; # colums compared by a=b my @cols_notnull = @{$mscheme->cols_notnull($table)}; # colums compared by a=b or a is null and b is null my @cols_null = @{$mscheme->cols_null($table)}; # primary key columns my @cols_pk = @{$mscheme->cols_pk($table)}; # columns to compare (not in primary key) my @cols_cmp = @{$mscheme->cols_notpk($table)}; my @cols_skip; # skipped columns my @cols_test; # all colums to test (without skipped) foreach my $row (@{$mscheme->pg_attribute($table)}) { # attname | format_type | attnotnull | atthasdef | attnum # FIXME: do something with attributes which shouldn't be compared # (date, time, datetime, timestamp) if ($row->{format_type} =~ /(date)|(time)/i) { push @cols_skip,$row->{attname}; } else { push @cols_test,$row->{attname}; } } if ($debug) { print STDERR "DEBUG: table $table not null cols: (",join(", ",@cols_notnull),")"; print STDERR " - null cols: (",join(", ",@cols_null),")" if (@cols_null); print STDERR " - skip cols: (",join(", ",@cols_skip),")" if (@cols_skip); print STDERR "\n"; } # diff data if (! @cols_pk) { print STDERR "can't find PK rows for table '$table' using all\n"; @cols_pk = @cols; } if ($verbose) { print "table '$table' using for key: (",join(", ",@cols_pk),") to compare cols: (",join(", ",@cols_cmp),")\n"; } # diff data my $msql = "select ".join(",",@cols)." from $table"; my $ssql = $msql; sub sql_where { my @cols = @_; my $and = ""; my $where = " where "; foreach my $col (@cols) { $where .= "$and$col=?"; $and = " and "; } return $where; } sub sql_order { my @cols = @_; my $order = " order by "; my $comma = ""; foreach my $col (@cols) { $order .= "$comma$col asc"; $comma = ", "; } return $order; } my $order = sql_order(@cols_pk); $msql .= $order; $ssql .= $order; debug_sql($msql); my $msth = $mdbh->prepare($msql) || die; $msth->execute() || die; my $ssth = $sdbh->prepare($ssql) || die; $ssth->execute() || die; my $diff_row = 0; my ($mrow,$srow); # have_* use constant NO_ROW => 0; use constant FETCH_ROW => 1; use constant HAVE_ROW => 2; my ($have_mrow,$have_srow) = (FETCH_ROW,FETCH_ROW); while ($have_mrow != NO_ROW || $have_srow != NO_ROW) { debug "have mrow: $have_mrow srow: $have_srow\n"; sub pk_val { my $row = shift || die "need row"; my @cols = shift || die "need cols"; my @val; foreach my $col (@cols) { push @val,$row->{$col}; } return @val; } # fetch row from master if ($have_mrow == FETCH_ROW) { debug "fetch row from master: $msql\n"; $mrow = $msth->fetchrow_hashref(); debug_row($mrow,@cols); if ($mrow) { # fill-in primary key values $have_mrow = HAVE_ROW; } else { $have_mrow = NO_ROW; } } # fetch row from slave if ($have_srow == FETCH_ROW) { debug "fetch row from slave: $ssql\n"; $srow = $ssth->fetchrow_hashref(); debug_row($srow,@cols); if ($srow) { $have_srow = HAVE_ROW; } else { $have_srow = NO_ROW; } } debug "have mrow: $have_mrow srow: $have_srow\n"; # insert into slave database sub sql_insert { my $dbh = shift @_ || die "need dbh"; my $table = shift @_ || die "need table as argument"; my $row = shift @_ || die "need row data"; my @cols = @_; my $sql = "insert into $table (".join(",",@cols).") values ("; my $comma = ""; foreach my $col (@cols) { $sql .= $comma.$dbh->quote($row->{$col}); $comma = ","; } $sql.=")"; debug_sql($sql); return $sql; } # delete from slave database sub sql_delete { my $dbh = shift @_ || die "need dbh"; my $table = shift @_ || die "need table as argument"; my $row = shift @_ || die "need row as argument"; my @cols_pk = @_; my $where = sql_where(@cols_pk); my $sql = "delete from $table"; foreach my $col (@cols_pk) { my $val = $dbh->quote($row->{$col}) || die "can't find value in row for col $col"; $where =~ s/\?/$val/; } $sql .= $where; debug_sql($sql); return $sql; } # update row in slave database sub sql_update { my $dbh = shift @_ || die "need dbh"; my $table = shift @_ || die "need table as argument"; my $col = shift @_ || die "need col to update"; my $row = shift @_ || die "need row"; my @cols_pk = @_; my $sql = "update $table set $col=".$dbh->quote($row->{$col}); my $where = sql_where(@cols_pk); foreach my $col (@cols_pk) { my $val = $dbh->quote($row->{$col}) || die "can't find value in row for col $col"; $where =~ s/\?/$val/; } $sql .= $where; debug_sql($sql); return $sql; } # master slave # 1 = 1 test # 1 < 2 insert mrow # 2 > 1 delete srow # 1 = undef insert mrow # undef = 1 delete srow my $pk_same = 1; # check key cols for row foreach my $col (@cols_pk) { if ( ($have_mrow == NO_ROW && $have_srow == HAVE_ROW) || ($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} gt $srow->{$col}) ) { $diff_row++; $pk_same = 0; print STDERR "EXTRA row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($srow,@cols_pk)).")\n" if ($verbose); print sql_delete($sdbh,$table,$srow,@cols_pk),";\n"; $have_srow = FETCH_ROW; last; } elsif ( ($have_mrow == HAVE_ROW && $have_srow == NO_ROW) || ($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} lt $srow->{$col}) ) { $diff_row++; $pk_same = 0; print STDERR "MISSING row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($mrow,@cols_pk)).")\n" if ($verbose); print sql_insert($mdbh,$table,$mrow,@cols),";\n"; $have_mrow = FETCH_ROW; last; } } if ($pk_same && $have_mrow == HAVE_ROW && $have_srow == HAVE_ROW) { # check non-key cols for row foreach my $col (@cols_cmp) { if ($mrow->{$col} ne $srow->{$col}) { $diff_row++; print STDERR "DIFF in table '$table' row ($col): [".join(",",@cols_pk)."] '$mrow->{$col}' != '$srow->{$col}'\n" if ($verbose); print sql_update($mdbh,$table,$col,$mrow,@cols_pk),";\n"; } } $have_mrow = FETCH_ROW; $have_srow = FETCH_ROW; } } print STDERR "$diff_row differences in table $table\n" if ($verbose && $diff_row > 0); $diff_total += $diff_row; } if ($verbose) { if ($diff_total == 0) { print STDERR "databases are same"; } elsif ($diff_total > 0) { print STDERR "$diff_total differences in all tables\n"; } else { die "this shouldn't happend. please report a bug!"; } } # enable triggers again on slave foreach my $tr (@triggers) { print "update pg_trigger set tgenabled = true where tgname='$tr';\n"; } # end transaction print "commit;\n"; $mdbh->disconnect(); $sdbh->disconnect();