#!/usr/bin/perl -w
#
# psinib - Perl Snapshot Is Not Incremental Backup
#
# written by Dobrica Pavlinusic <dpavlin@rot13.org> 2003-01-03
# released under GPL v2 or later.
# 
# Backup SMB directories using file produced by LinNeighbourhood (or some
# other program [vi :-)] which produces file in format:
#
# smbmount service mountpoint options
#
#
# usage:
# 	$ psinib.pl mountscript

use strict 'vars';
use Data::Dumper;
use Net::Ping;
use POSIX qw(strftime);
use List::Compare;
use Filesys::SmbClient;
#use Taint;
use Fcntl qw(LOCK_EX LOCK_NB);
use Digest::MD5;
use File::Basename;
use DB_file;

# configuration
my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S';	# strftime format for logfile
my $DIR_TIME_FMT = '%Y%m%d';		# strftime format for backup dir

my $LOG = '/var/log/backup.log';	# add path here...
#$LOG = '/tmp/backup.log';

# store backups in which directory
my $BACKUP_DEST = '/backup/isis_backup';

# files to ignore in backup
my @ignore = ('.md5sum', '.md5db', '.backupignore', 'backupignore.txt');

# open log
open(L, ">> $LOG") || die "can't open log $LOG: $!";
select((select(L), $|=1)[0]);	# flush output

# make a lock on logfile

my $c = 0;
{
	flock L, LOCK_EX | LOCK_NB and last;
	sleep 1;
	redo if ++$c < 10;
	# no response for 10 sec, bail out
	print STDERR "can't take lock on $LOG -- another $0 running?\n";
	exit 1;
}

# taint path: nmblookup should be there!
$ENV{'PATH'} = "/usr/bin:/bin";

my $mounts = shift @ARGV ||
	'mountscript';
#	die "usage: $0 mountscript";


my @in_backup;	# shares which are backeduped this run

my $p = new Net::Ping->new("tcp", 2);
# ping will try tcp connect to netbios-ssn (139)
$p->{port_num} = getservbyname("netbios-ssn", "tcp");

my $backup_ok = 0;

my $smb;
my %smb_atime;
my %smb_mtime;
my %file_md5;

open(M, $mounts) || die "can't open $mounts: $!";
while(<M>) {
	chomp;
	next if !/^\s*smbmount\s/;
	my (undef,$share,undef,$opt) = split(/\s+/,$_,4);

	my ($user,$passwd,$workgroup);

	foreach (split(/,/,$opt)) {
		my ($n,$v) = split(/=/,$_,2);
		if ($n =~ m/username/i) {
			if ($v =~ m#^(.+)/(.+)%(.+)$#) {
				($user,$passwd,$workgroup) = ($1,$2,$3);
			} elsif ($v =~ m#^(.+)/(.+)$#) {
				($user,$workgroup) = ($1,$2);
			} elsif ($v =~ m#^(.+)%(.+)$#) {
				($user,$passwd) = ($1,$2);
			} else {
				$user = $v;
			}
		} elsif ($n =~ m#workgroup#i) {
			$workgroup = $v;
		}
	}

	push @in_backup,$share;


	my ($host,$dir,$date_dir) = share2host_dir($share);
	my $bl = "$BACKUP_DEST/$host/$dir/latest";	# latest backup
	my $bc = "$BACKUP_DEST/$host/$dir/$date_dir";	# current one
	my $real_bl;
	if (-l $bl) {
		$real_bl=readlink($bl) || die "can't read link $bl: $!";
		$real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
		if (-l $bc && $real_bl eq $bc) {
			print "$share allready backuped...\n";
			$backup_ok++;
			next;
		}

	}


	print "working on $share\n";


	my $ip = get_ip($share);

	if ($ip) {
		xlog($share,"IP is $ip");
		if ($p->ping($ip)) {
			snap_share($share,$user,$passwd,$workgroup);
			$backup_ok++;
		}
	}
}
close(M);

xlog("","$backup_ok backups completed of total ".($#in_backup+1)." this time (".int($backup_ok*100/($#in_backup+1))." %)");

1;

#-------------------------------------------------------------------------


# get IP number from share
sub get_ip {
	my $share = shift;

	my $host = $1 if ($share =~ m#//([^/]+)/#);

	my $ip = `nmblookup $host`;
	if ($ip =~ m/(\d+\.\d+\.\d+\.\d+)\s$host/i) {
		return $1;
	}
}


# write entry to screen and log
sub xlog {
	my $share = shift;
	my $t = strftime $LOG_TIME_FMT, localtime;
	my $m = shift || '[no log entry]';
	print STDERR $m,"\n";
	print L "$t $share\t$m\n";
}

# dump warn and dies into log
BEGIN { $SIG{'__WARN__'} = sub { xlog('WARN',$_[0]) ; warn $_[0] } }
BEGIN { $SIG{'__DIE__'} = sub { xlog('DIE',$_[0]) ; die $_[0] } }


# split share name to host, dir and currnet date dir
sub share2host_dir {
	my $share = shift;
	my ($host,$dir);
	if ($share =~ m#//([^/]+)/(.+)$#) {
		($host,$dir) = ($1,$2);
		$dir =~ s/\W/_/g;
		$dir =~ s/^_+//;
		$dir =~ s/_+$//;
	} else {
		print "Can't parse share $share into host and directory!\n";
		return;
	}
	return ($host,$dir,strftime $DIR_TIME_FMT, localtime);
}


# make a snapshot of a share
sub snap_share {

	my $share = shift;

	my %param = ( debug => 0 );

	$param{username} = shift || warn "can't find username for share $share";
	$param{password} = shift || warn "can't find passwod for share $share";
	$param{workgroup} = shift || warn "can't find workgroup for share $share";

	my ($host,$dir,$date_dir) = share2host_dir($share);

	# latest backup directory
	my $bl = "$BACKUP_DEST/$host/$dir/latest";
	# current backup directory
	my $bc = "$BACKUP_DEST/$host/$dir/$date_dir";

	my $real_bl;
	if (-l $bl) {
		$real_bl=readlink($bl) || die "can't read link $bl: $!";
		$real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
	} else {
		print "no old backup, trying to find last backup, ";
		if (opendir(BL_DIR, "$BACKUP_DEST/$host/$dir")) {
			my @bl_dirs = sort grep { !/^\./ && -d "$BACKUP_DEST/$host/$dir/$_" } readdir(BL_DIR);
			closedir(BL_DIR);
			$real_bl=pop @bl_dirs;
			print "using $real_bl as latest...\n";
			$real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
			if ($real_bl eq $bc) {
				xlog($share,"latest from today (possible partial backup)");
				rename $real_bl,$real_bl.".partial" || warn "can't reaname partial backup: $!";
				$real_bl .= ".partial";
			}
		} else {
			print "this is first run...\n";
		}
	}

	if (-l $bc && $real_bl && $real_bl eq $bc) {
		print "$share allready backuped...\n";
		return;
	}

	die "You should really create BACKUP_DEST [$BACKUP_DEST] by hand! " if (!-e $BACKUP_DEST);

	if (! -e "$BACKUP_DEST/$host") {
		mkdir "$BACKUP_DEST/$host" || die "can't make dir for host $host, $BACKUP_DEST/$host: $!";
		print "created host directory $BACKUP_DEST/$host...\n";
	}

	if (! -e "$BACKUP_DEST/$host/$dir") {
		mkdir "$BACKUP_DEST/$host/$dir" || die "can't make dir for share $share, $BACKUP_DEST/$host/$dir $!";
		print "created dir for share $share, $BACKUP_DEST/$host/$dir...\n";
	}

	mkdir $bc || die "can't make dir for current backup $bc: $!";

	my @dirs = ( "/" );
	my @smb_dirs = ( "/" );

	my $transfer = 0;	# bytes transfered over network

	# this will store all available files and sizes
	my @files;
	my %file_size;
	my %file_atime;
	my %file_mtime;
	#my %file_md5;

	my @smb_files;
	my %smb_size;
	#my %smb_atime;
	#my %smb_mtime;

	sub norm_dir {
		my $foo = shift;
		my $prefix = shift;
		$foo =~ s#//+#/#g;
		$foo =~ s#/+$##g;
		$foo =~ s#^/+##g;
		return $prefix.$foo if ($prefix);
		return $foo;
	}

	# read local filesystem
	my $di = 0;
	while ($di <= $#dirs && $real_bl) {
		my $d=$dirs[$di++];
		opendir(DIR,"$real_bl/$d") || warn "opendir($real_bl/$d): $!\n";

		# read .backupignore if exists
		if (-f "$real_bl/$d/.backupignore") {
			open(I,"$real_bl/$d/.backupignore");
			while(<I>) {
				chomp;
				push @ignore,norm_dir("$d/$_");
			}
			close(I);
#print STDERR "ignore: ",join("|",@ignore),"\n";
			link "$real_bl/$d/.backupignore","$bc/$d/.backupignore" ||
				warn "can't copy $real_bl/$d/.backupignore to current backup dir: $!\n";
		}

		# read .md5sum if exists
		if (-f "$real_bl/$d/.md5sum") {
			open(I,"$real_bl/$d/.md5sum");
			while(<I>) {
				chomp;
				my ($md5,$f) = split(/\s+/,$_,2);
				$file_md5{$f}=$md5;
			}
			close(I);
		}

		my @clutter = readdir(DIR);
		foreach my $f (@clutter) {
			next if ($f eq '.');
			next if ($f eq '..');
			my $pr = norm_dir("$d/$f");	# path relative
			my $pf = norm_dir("$d/$f","$real_bl/");	# path full
			if (grep(/^\Q$pr\E$/,@ignore) == 0) {
				if (-f $pf) {
					push @files,$pr;
					$file_size{$pr}=(stat($pf))[7];
					$file_atime{$pr}=(stat($pf))[8];
					$file_mtime{$pr}=(stat($pf))[9];
				} elsif (-d $pf) {
					push @dirs,$pr;
				} else {
					print STDERR "unknown type: $pf\n";
				}
			} else {
				print STDERR "ignored: $pr\n";
			}
		}
	}

	xlog($share,($#files+1)." files and ".($#dirs+1)." dirs on local disk before backup");

	# read smb filesystem

	xlog($share,"smb to $share as $param{username}/$param{workgroup}");

	# FIX: how to aviod creation of ~/.smb/smb.conf ?
	$smb = new Filesys::SmbClient(%param) || die "SmbClient :$!\n";

	$di = 0;
	while ($di <= $#smb_dirs) {
		my $d=$smb_dirs[$di];
		my $pf = norm_dir($d,"smb:$share/");	# path full
		my $D = $smb->opendir($pf);
		if (! $D) {
			xlog($share,"FATAL: $share: $!");
			# remove failing dir
			delete $smb_dirs[$di];
			next;
		}
		$di++;

		my @clutter = $smb->readdir_struct($D);
		foreach my $item (@clutter) {
			my $f = $item->[1];
			next if ($f eq '.');
			next if ($f eq '..');
			my $pr = norm_dir("$d/$f");	# path relative
			my $pf = norm_dir("$d/$f","smb:$share/"); # path full
			if (grep(/^\Q$pr\E$/,@ignore) == 0) {
				if ($item->[0] == main::SMBC_FILE) {
					push @smb_files,$pr;
					$smb_size{$pr}=($smb->stat($pf))[7];
					$smb_atime{$pr}=($smb->stat($pf))[10];
					$smb_mtime{$pr}=($smb->stat($pf))[11];
				} elsif ($item->[0] == main::SMBC_DIR) {
					push @smb_dirs,$pr;
				} else {
					print STDERR "unknown type: $pf\n";
				}
			} else {
				print STDERR "smb ignored: $pr\n";
			}
		}
	}

	xlog($share,($#smb_files+1)." files and ".($#smb_dirs+1)." dirs on remote share");

	# sync dirs
	my $lc = List::Compare->new(\@dirs, \@smb_dirs);

	my @dirs2erase = $lc->get_Lonly;
	my @dirs2create = $lc->get_Ronly;
	xlog($share,($#dirs2erase+1)." dirs to erase and ".($#dirs2create+1)." dirs to create");

	# create new dirs
	foreach (sort @smb_dirs) {
		mkdir "$bc/$_" || warn "mkdir $_: $!\n";
	}

	# sync files
	$lc = List::Compare->new(\@files, \@smb_files);

	my @files2erase = $lc->get_Lonly;
	my @files2create = $lc->get_Ronly;
	xlog($share,($#files2erase+1)." files to erase and ".($#files2create+1)." files to create");

	sub smb_copy {
		my $smb = shift;

		my $from = shift;
		my $to = shift;


		my $l = 0;
		
		foreach my $f (@_) {
#print "smb_copy $from/$f -> $to/$f\n";
			if (! open(F,"> $to/$f")) {
				print STDERR "can't open new file $to/$f: $!\n";
				next;
			}

			my $md5 = Digest::MD5->new;

			my $fd = $smb->open("$from/$f");
			if (! $fd) {
				print STDERR "can't open smb file $from/$f: $!\n";
				next;
			}

			while (defined(my $b=$smb->read($fd,4096))) {
				print F $b;
				$l += length($b);
				$md5->add($b);
			}

			$smb->close($fd);
			close(F);

			$file_md5{$f} = $md5->hexdigest;

			# FIX: this fails with -T
			my ($a,$m) = ($smb->stat("$from/$f"))[10,11];
			utime $a, $m, "$to/$f" ||
				warn "can't update utime on $to/$f: $!\n";

		}
		return $l;
	}

	# copy new files
	foreach (@files2create) {
		$transfer += smb_copy($smb,"smb:$share",$bc,$_);
	}

	my $size_sync = 0;
	my $atime_sync = 0;
	my $mtime_sync = 0;
	my @sync_files;
	my @ln_files;

	foreach ($lc->get_intersection) {

		my $f;

		if ($file_size{$_} != $smb_size{$_}) {
			$f=$_;
			$size_sync++;
		}
		if ($file_atime{$_} != $smb_atime{$_}) {
			$f=$_;
			$atime_sync++;
		}
		if ($file_mtime{$_} != $smb_mtime{$_}) {
			$f=$_;
			$mtime_sync++;
		}

		if ($f) {
			push @sync_files, $f;
		} else {
			push @ln_files, $_;
		}
	}

	xlog($share,($#sync_files+1)." files will be updated (diff: $size_sync size, $atime_sync atime, $mtime_sync mtime), ".($#ln_files+1)." will be linked.");

	foreach (@sync_files) {
		$transfer += smb_copy($smb,"smb:$share",$bc,$_);
	}

	xlog($share,"$transfer bytes transfered...");

	foreach (@ln_files) {
		link "$real_bl/$_","$bc/$_" || warn "link $real_bl/$_ -> $bc/$_: $!\n";
	}

	# remove files
	foreach (sort @files2erase) {
		unlink "$bc/$_" || warn "unlink $_: $!\n";
	}

	# remove not needed dirs (after files)
	foreach (sort @dirs2erase) {
		rmdir "$bc/$_" || warn "rmdir $_: $!\n";
	}

	# remove old .md5sum
	foreach (sort @dirs) {
		unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum");
	}

	# create .md5sum
	my $last_dir = '';
	my $md5;
	foreach my $f (sort { $file_md5{$a} cmp $file_md5{$b} } keys %file_md5) {
		my $dir = dirname($f);
		my $file = basename($f);
#print "$f -- $dir / $file<--\n";
		if ($dir ne $last_dir) {
			close($md5) if ($md5);
			open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!";
			$last_dir = $dir;
#print STDERR "writing $last_dir/.md5sum\n";
		}
		print $md5 $file_md5{$f},"  $file\n";
	}
	close($md5);

	# create leatest link
#print "ln -s $bc $real_bl\n";
	if (-l $bl) {
		unlink $bl || warn "can't remove old latest symlink $bl: $!\n";
	}
	symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n";

	# FIX: sanity check -- remove for speedup
	xlog($share,"failed to create latest symlink $bl -> $bc...") if (readlink($bl) ne $bc || ! -l $bl);

	xlog($share,"backup completed...");
}

__END__
#-------------------------------------------------------------------------


=head1 NAME

psinib - Perl Snapshot Is Not Incremental Backup

=head1 SYNOPSIS

./psinib.pl

=head1 DESCRIPTION

This script in current version support just backup of Samba (or Micro$oft
Winblowz) shares to central disk space. Central disk space is organized in
multiple directories named after:

=over 4

=item *
server which is sharing files to be backed up

=item *
name of share on server

=item *
dated directory named like standard ISO date format (YYYYMMDD).

=back

In each dated directory you will find I<snapshot> of all files on
exported share on that particular date.

You can also use symlink I<latest> which will lead you to
last completed backup. After that you can use some other backup
software to transfer I<snapshot> to tape, CD-ROM or some other media.

=head2 Design considerations

Since taking of share snapshot every day requires a lot of disk space and
network bandwidth, B<psinib> uses several techniques to keep disk usage and
network traffic at acceptable level:

=over 3

=item - usage of hard-links to provide same files in each snapshot (as opposed
to have multiple copies of same file)

=item - usage of file size, atime and mtime to find changes of files without
transferring whole file over network (just share browsing is transfered
over network)

=item - usage of C<.md5sum> files (compatible with command-line utility
C<md5sum>) to keep file between snapshots hard-linked

=back

=head1 CONFIGURATION

This section is not yet written.

=head1 HACKS, TRICKS, BUGS and LIMITATIONS

This chapter will have all content that doesn't fit anywhere else.

=head2 Can snapshots be more frequent than daily?

There is not real reason why you can't take snapshot more often than
once a day. Actually, if you are using B<psinib> to backup Windows
workstations you already know that they tend to come-and-go during the day
(reboots probably ;-), so running B<psinib> several times a day increases
your chance of having up-to-date backup (B<psinib> will not make multiple
snapshots for same day, nor will it update snapshot for current day if
it already exists).

However, changing B<psinib> to produce snapshots which are, for example, hourly
is a simple change of C<$DIR_TIME_FMT> which is currently set to
C<'%Y%m%d'> (see I<strftime> documentation for explanation of that 
format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots
(if your network is fast enough, that is...). Also, some of messages in
program will sound strange, but other than that it should work.
I<You have been warned>.

=head2 Do I really need to share every directory which I want to snapshot?

Actually, no. Due to usage of C<Filesys::SmbClient> module, you can also
specify sub-directory inside your share that you want to backup. This feature
is most useful if you want to use administrative shares (but, have in mind
that you have to enter your Win administrator password in unencrypted file on
disk to do that) like this:

	smbmount //server/c$/WinNT/fonts  /mnt  -o username=administrator%win  

After that you will get directories with snapshots like:

	server/c_WinNT_fonts/yyyymmdd/....

=head2 Won't I run out of disk space?

Of course you will... Snapshots and logfiles will eventually fill-up your disk.
However, you can do two things to stop that:

=head3 Clean snapshort older than x days

You can add following command to your C<root> crontab:

	find /backup/isis_backup -type d -mindepth 3 -maxdepth 3 -mtime +11 -exec rm -Rf {} \;

I assume that C</backup/isis_backup> is directory in which are your snapshots
and that you don't want to keep snapshots older than 11 days (that's
C<-mtime +11> part of command).

=head3 Rotate your logs

I will leave that to you. I relay on GNU/Debian's C<logrotate> to do it for me.

=head2 What are I<YYYYMMDD.partial> directories?

If there isn't I<latest> symlink in snapshot directory, it's preatty safe to
assume that previous backup from that day failed. So, that directory will
be renamed to I<YYYYMMDD.partial> and snapshot will be performed again,
linking same files (other alternative would be to erase that dir and find
second-oldest directory, but this seemed like more correct approach).

=head1 AUTHOR

Dobrica Pavlinusic <dpavlin@rot13.org>

L<http://www.rot13.org/~dpavlin/>

=head1 LICENSE

This product is licensed under GNU Public License (GPL) v2 or later.

=cut
