#!/usr/bin/perl -w
# File:		sap.monitor
# Author:	Dobrica Pavlinusic, dpavlin@rot13.org
#		http://www.rot13.org/~dpavlin/sysadm.html
# Description:	monitor sap servers using sapinfo from RFCSDK
#
# Usage: sap.monitor [-[hH] ashost only/ignore] [-[sS] sysnr only/ignore]
#
# e.g.	sap.monitor -s 20	will scan only hosts with sysnr == 20
#	sap.monitor -S 20	will scan only hosts with sysnr != 20

# configuration file in /usr/local/etc/sap-mon.conf describes which
# hosts (ashost) and systems (sysnr) you want to check.
#
# format of line is:
#
# ashost [tab|space] sysnr # optional comment
#
# you can spacify host as hostname (sap01) or with sap routers in-between
# to test routers too (/H/saprtr/H/sap01)

use strict;
use Getopt::Std;

# change paths here if you want to
my $CONFIG = "/usr/local/etc/sap-mon.conf";
my $SAPINFO = "/usr/local/bin/sapinfo";
# number of tries to repeat sapinfo if it fails first time
my $repeat = 3;
# seconds to wait between retries
my $repeat_wait = 5;
# sapinfo timeout
my $sapinfo_timeout = 10;

my %opts;
getopt('h:s:H:S:', \%opts);

my @config;
open(C, $CONFIG) || die "sap-mon.conf: $!";
@config = <C>;
close(C);

my @failed;
my @ok;
my $fail_msg = "";

# sap info leaves trace files, so create dir without write permission
# and chdir to it!
mkdir "/tmp/sap$$",0555		|| die "can't make /tmp/sap$$: $!";
chdir "/tmp/sap$$"		|| die "can't chdir in /tmp/sap$$: $!";

foreach (@config) {
	chomp;
	s/#.+$//g;	# nuke comments
	s/^\s+$//g;	# remove empty lines
	my ($ashost,$sysnr,undef) = split(/\s+/,$_,3);
	if ($ashost && $ashost ne "" && $sysnr && $sysnr ne "" &&
		(($opts{h} && $ashost =~ m/$opts{h}/) || not $opts{h}) &&
		(($opts{s} && $sysnr  =~ m/$opts{s}/) || not $opts{s}) &&
		(($opts{H} && $ashost !~ m/$opts{H}/) || not $opts{H}) &&
		(($opts{S} && $sysnr  !~ m/$opts{S}/) || not $opts{S}) ) {
		my $ret = 1;
		my $loop = 0;
		my $output;
		my $sys_id;
		for(my $i=0; $i<$repeat; $i++) {
			eval {
				local $SIG{ALRM} = sub { die "timeout\n"; };
				alarm $sapinfo_timeout; # wait for sapinfo to finish
				$output = `$SAPINFO trace=0 ashost=$ashost sysnr=$sysnr`;
			};
			alarm 0; # turn alarm off
			undef $sys_id;
			$output = "" if (! defined $output);
			if ($output =~ m/System\s+ID\s+(\w+)/i) {
				$sys_id = $1;
				last;
			}
#			print "$loop: $ashost $sysnr $ret\n";
			$loop++;
			sleep $repeat_wait;
		}
		if (! $sys_id) {
			push @failed, "$ashost ($sysnr)";
			$fail_msg .= $output;
		} else {
			push @ok, "$ashost ($sys_id)";
		}
	}
}

my $exit = 0;

if (@failed) {
	print join(", ",@failed)," FAILED\n\n";
	print "$fail_msg\n";
	$exit = 1;
}

print "ALL OK\nCHECKED HOSTS (which are OK): ",join(", ",@ok),"\n\n";

rmdir "/tmp/sap$$"		|| die "can't rmdir in /tmp/sap$$: $!";

exit $exit;
