/*
 * integrate swish-e into PostgreSQL
 *
 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-02-18
 *
 * TODO:
 * - check null input using PG_ARGISNULL before using PG_GETARG_xxxx
 * - support composite type arguments
 * - split error_or_abort
 * - use getResultPropValue not SwishResultPropertyStr
 * - fix everything about pgswish_arr which is broken
 *
 * NOTES:
 * - clear structures with memset to support hash indexes (who whould like
 *   to create hash index on table returned from function?)
 * - number of returned rows is set by PostgreSQL evaluator, see:
 *   http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
 *
 * Based on:
 * - C example from PostgreSQL documentation (BSD licence)
 * - swish-e example src/libtest.c (GPL)
 * - _textin/_textout from pgcurl.c (LGPL)
 *
 * This code is licenced under GPL
 */

#include "postgres.h"
#include "fmgr.h"
#include "funcapi.h"
#include "utils/builtins.h"
#include "utils/array.h"
#include "miscadmin.h"
#include <swish-e.h>

#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))

/* Globals */
static SW_HANDLE   swish_handle = NULL;	/* Database handle */
static SW_SEARCH   swish_search = NULL;	/* search handle -- search parameters */
static SW_RESULTS  swish_results = NULL; /* results handle -- list of results */
static SW_RESULT   *sw_res = NULL;	/* one row from swish-e results */

/* SortMem got renamed in PostgreSQL 8.0 */
#ifndef SortMem
 #define SortMem 16 * 1024
#endif

/* prototypes */
char *prop2text(SW_RESULT sw_res, char *propname);
char *prop2int(SW_RESULT sw_res, char *propname);
void elog_swish( SW_HANDLE swish_handle );

/* define PostgreSQL v1 function */
PG_FUNCTION_INFO_V1(pgswish);
Datum pgswish(PG_FUNCTION_ARGS) {

	FuncCallContext	*funcctx;
	int		call_cntr;
	int		max_calls;
	TupleDesc	tupdesc;
	TupleTableSlot	*slot;
	AttInMetadata	*attinmeta;
	char		*index_path;
	char		*query;
	FILE		*logfh;

	/* stuff done only on the first call of the function */
	if (SRF_IS_FIRSTCALL()) {
		MemoryContext	oldcontext;

		/* take arguments from function */
		//index_path = _textout(PG_GETARG_TEXT_P(0));
		index_path = _textout(PG_GETARG_TEXT_P(0));
		query = _textout(PG_GETARG_TEXT_P(1));

		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/* switch to memory context appropriate for multiple function calls */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		
		/* Send any errors or warnings to log, as well as
		 * STDOUT and STDERR (just to be sure) */
		if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
			set_error_handle( logfh );
			elog(DEBUG1, "loggin swish-e errors to /tmp/pgswish.log");
			/* redirect STDOUT and STDERR to log */
			dup2(1, logfh);
			dup2(2, logfh);
		} else {
			elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
		}

		elog(DEBUG1, "pgswish: SwishInit(%s)", index_path);

		swish_handle = SwishInit( index_path );

		if ( SwishError( swish_handle ) || ! swish_handle) {
			elog(ERROR, "pgswish: SwishInit(%s) failed: %s", index_path, SwishErrorString( swish_handle ));
		
			SRF_RETURN_DONE(funcctx);
		}
		
		if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
		/* set ranking scheme. default is 0 */
		SwishRankScheme( swish_handle, 0 );
		if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);

		elog(DEBUG1, "pgswish: SwishQuery(%s)", query);
		/* Here's a short-cut to searching that creates a search object and searches at the same time */
		swish_results = SwishQuery( swish_handle, query);
		if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);

		/* total number of tuples to be returned */
		funcctx->max_calls = SwishHits( swish_results );

		/* check if results exists */
		if ( 0 == funcctx->max_calls )
			elog(INFO, "no results for: %s", query );

		elog(DEBUG1, "pgswish: SwishHits = %d", funcctx->max_calls);

		/* Build a tuple description for a __pgswish tuple */
		tupdesc = RelationNameGetTupleDesc("__pgswish");

		/* allocate a slot for a tuple with this tupdesc */
		slot = TupleDescGetSlot(tupdesc);

		/* assign slot to function context */
		funcctx->slot = slot;

		/*
		 * generate attribute metadata needed later to produce tuples from raw
		 * C strings
		 */
		attinmeta = TupleDescGetAttInMetadata(tupdesc);
		funcctx->attinmeta = attinmeta;

		MemoryContextSwitchTo(oldcontext);

		elog(DEBUG1, "SRF_IS_FIRSTCALL done");
	}

	/* stuff done on every call of the function */
	funcctx = SRF_PERCALL_SETUP();

	call_cntr = funcctx->call_cntr;
	max_calls = funcctx->max_calls;
	slot = funcctx->slot;
	attinmeta = funcctx->attinmeta;
 
	if (call_cntr < max_calls) {
		char		**values;
		HeapTuple	tuple;
		Datum		result;

		elog(DEBUG1, "pgswish: loop count %d", call_cntr);

		if (! swish_results) {
			elog(ERROR, "pgswish: no swish-e results");
			SRF_RETURN_DONE(funcctx);
		}
		
		elog(DEBUG1, "pgswish: check for swish-e error");
		if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);

		/*
		 * Prepare a values array for storage in our slot.
		 * This should be an array of C strings which will
		 * be processed later by the type input functions.
		 */

		sw_res = SwishNextResult( swish_results );
		if (! sw_res) {
			elog(ERROR, "pgswish: swish-e sort result list: %d rows expected %d", call_cntr, max_calls - 1);
			Free_Results_Object( swish_results );
			/* Free_Search_Object( swish_search ); */
			SRF_RETURN_DONE(funcctx);
		}
		
		elog(DEBUG1, "Path: %s\n  Rank: %lu\n  Size: %lu\n  Title: %s\n  Index: %s\n  Modified: %s\n  Record #: %lu\n  File   #: %lu\n\n",
			SwishResultPropertyStr   ( sw_res, "swishdocpath" ),
			SwishResultPropertyULong ( sw_res, "swishrank" ),
			SwishResultPropertyULong ( sw_res, "swishdocsize" ),
			SwishResultPropertyStr   ( sw_res, "swishtitle"),
			SwishResultPropertyStr   ( sw_res, "swishdbfile" ),
			SwishResultPropertyStr   ( sw_res, "swishlastmodified" ),
			SwishResultPropertyULong ( sw_res, "swishreccount" ),  /* can figure this out in loop, of course */
			SwishResultPropertyULong ( sw_res, "swishfilenum" )
		);

		values = (char **) palloc(4 * sizeof(char *));

		values[0] = (char *) prop2int( sw_res, "swishrank" );
		values[1] = (char *) prop2text( sw_res, "swishdocpath" );
		values[2] = (char *) prop2text( sw_res, "swishtitle" );
		values[3] = (char *) prop2int( sw_res, "swishdocsize" );

/*
		values[0] = (char *) palloc(16 * sizeof(char));
		snprintf(values[0], 16, "%d", 1);
		values[1] = (char *) palloc(16 * sizeof(char));
		snprintf(values[1], 16, "%d", 2);
		values[2] = (char *) palloc(16 * sizeof(char));
		snprintf(values[2], 16, "%d", 3);
		values[3] = (char *) palloc(16 * sizeof(char));
		snprintf(values[3], 16, "%d", 4);
*/

		/* build a tuple */
		tuple = BuildTupleFromCStrings(attinmeta, values);

		/* make the tuple into a datum */
		result = TupleGetDatum(slot, tuple);

		/* clean up ? */
		pfree(values[0]);
		pfree(values[1]);
		pfree(values[2]);
		pfree(values[3]);
		pfree(values);
		
		elog(DEBUG1, "row: %s|%s|%s|%s",values[0],values[1],values[2],values[3]);
	
		SRF_RETURN_NEXT(funcctx, result);
	} else {
		elog(DEBUG1, "loop over");

		/* free swish object and close */
		/* Free_Search_Object( swish_search ); */
		SwishClose( swish_handle );

		/* do when there is no more left */
		SRF_RETURN_DONE(funcctx);
	}
}


/*
 * new function with support for property selection
 */

PG_FUNCTION_INFO_V1(pgswish_arr);
Datum pgswish_arr(PG_FUNCTION_ARGS)
{
	ArrayType	*prop_arr = PG_GETARG_ARRAYTYPE_P(5);
	Oid		prop_element_type = ARR_ELEMTYPE(prop_arr);
	int		prop_ndims = ARR_NDIM(prop_arr);
	int		*prop_dim_counts = ARR_DIMS(prop_arr);
	int		*prop_dim_lower_bounds = ARR_LBOUND(prop_arr);
	int		ncols = 0;
	int		nrows = 0;
	int		indx[MAXDIM];
	int16		prop_len;
	bool		prop_byval;
	char		prop_align;
	ReturnSetInfo	*rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
	AttInMetadata	*attinmeta;
	TupleDesc	tupdesc;
	Tuplestorestate *tupstore = NULL;
	HeapTuple	tuple;
	MemoryContext	per_query_ctx;
	MemoryContext	oldcontext;
	Datum		dvalue;
	char		**values;
	int		rsinfo_ncols;
	int		i, j;
	/* swish-e */
	FILE		*logfh;
	int		resnum;
	int		limit = 0;
	int		offset = 0;

	char		*index_path;
	char		*query;
	char		*attr;


	/* only allow 1D input array */
	if (prop_ndims == 1)
	{
		ncols = prop_dim_counts[0];
	}
	else
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid input array"),
				 errdetail("Input array must have 1 dimension")));
		
	/* check to see if caller supports us returning a tuplestore */
	if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("materialize mode required, but it is not " \
						"allowed in this context")));

	/* get info about element type needed to construct the array */
	get_typlenbyvalalign(prop_element_type, &prop_len, &prop_byval, &prop_align);

	/* get the requested return tuple description */
	tupdesc = rsinfo->expectedDesc;
	rsinfo_ncols = tupdesc->natts;

	/*
	 * The requested tuple description better match up with the array
	 * we were given.
	 */
	if (rsinfo_ncols != ncols)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid input array"),
				 errdetail("Number of elements in array must match number of query specified columns.")));

	/* OK, use it */
	attinmeta = TupleDescGetAttInMetadata(tupdesc);

	/* Now go to work */
	rsinfo->returnMode = SFRM_Materialize;

	per_query_ctx = fcinfo->flinfo->fn_mcxt;
	oldcontext = MemoryContextSwitchTo(per_query_ctx);

	/* initialize our tuplestore */
	tupstore = tuplestore_begin_heap(true, false, SortMem);


	/* take rest of arguments from function */

	/* index path */
	if (PG_ARGISNULL(0)) {
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("index path can't be null"),
				 errdetail("Index path must be valid full path to swish-e index")));
	}
	index_path = _textout(PG_GETARG_TEXT_P(0));

	/* query string */
	if (PG_ARGISNULL(0)) {
		query = "";
	} else {
		query = _textout(PG_GETARG_TEXT_P(1));
	}

	/* atribute filter */
	if (PG_ARGISNULL(2)) {
		attr = "";
	} else {
		attr = _textout(PG_GETARG_TEXT_P(2));
	}

	/* limit */
	if (PG_ARGISNULL(3)) {
		limit = 0;
	} else {
		limit = PG_GETARG_INT32(3);
	}

	/* offset */
	if (PG_ARGISNULL(4)) {
		offset = 0;
	} else {
		offset = PG_GETARG_INT32(4);
	}


	/* Send any errors or warnings to log, as well as
	 * STDOUT and STDERR (just to be sure) */
	if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
		set_error_handle( logfh );
		elog(DEBUG1, "loggin swish-e errors to /tmp/pgswish.log");
		/* redirect STDOUT and STDERR to log */
		dup2(1, logfh);
		dup2(2, logfh);
	} else {
		elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
	}

	elog(DEBUG1, "pgswish: SwishInit(%s)", index_path);

	swish_handle = SwishInit( index_path );

	if ( SwishError( swish_handle ) || ! swish_handle )
		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
			errmsg("pgswish: SwishInit(%s) failed", index_path ),
			errdetail( SwishErrorString( swish_handle ) )
		));

	elog(DEBUG1, "pgswish: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);


	/* set ranking scheme. default is 0 */
	SwishRankScheme( swish_handle, 0 );
	error_or_abort( swish_handle );

	elog(DEBUG1, "pgswish: SwishQuery(%s)", query);
	/* Here's a short-cut to searching that creates a search object
	 * and searches at the same time */

	elog_swish( swish_handle );
elog(INFO,"xxx -1 here");
	/* set the search phrase to the search condition object */
	if (! PG_ARGISNULL(1) && strlen(query) > 0) {
elog(INFO,"xxx -1a here");
/*
		swish_results = SwishQuery( swish_handle, query);
		elog_swish( swish_handle );
*/
elog(INFO,"swish_handle: %08x %s", swish_handle, ( swish_handle ? "not null" : "null") );
		swish_search = New_Search_Object( swish_handle, query );
elog(INFO,"swish_search: %08x %s", swish_search, ( swish_search ? "not null" : "null") );

		error_or_abort( swish_handle );
		elog_swish( swish_handle );

elog(INFO,"swishExecute: %s", query );
		swish_results = SwishExecute( swish_search, query );
		error_or_abort( swish_handle );
		elog_swish( swish_handle );
	}
elog(INFO,"xxx 0 here");
	error_or_abort( swish_handle );
	elog_swish( swish_handle );

elog(INFO,"xxx 1 here");
	/* total number of tuples to be returned */
	resnum = SwishHits( swish_results );

elog(INFO,"xxx 2 here");
	/* FIXME */
	if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
		elog(DEBUG1,"ignored: %s", attr);
	}

elog(INFO,"xxx 3 here");
	/* check if results exists */
	if ( 0 == resnum ) {
		elog(INFO, "pgswish: no results for: %s", query );
	}

elog(INFO,"xxx 4 here");
	/* total number of tuples to be returned */
	if (limit && limit < resnum) {
		nrows = limit - offset;
	} else {
		nrows = resnum - offset;
	}

elog(INFO,"xxx here");

	elog(DEBUG1, "pgswish: found %d hits for %s", resnum, query);


	values = (char **) palloc(ncols * sizeof(char *));

	for (i = 0; i < nrows; i++)
	{
		SwishSeekResult( swish_results, i + offset );
		sw_res = SwishNextResult( swish_results );

		/* get result from swish-e */
		if (! ( SwishErrorString( swish_handle ) ) ) {
			elog(INFO, "can't find result %d", i + offset);
		} else {
			elog(DEBUG1, "Path: %s\n  Rank: %lu\n  Size: %lu\n  Title: %s\n  Index: %s\n  Modified: %s\n  Record #: %lu\n  File   #: %lu\n\n",
				SwishResultPropertyStr   ( sw_res, "swishdocpath" ),
				SwishResultPropertyULong ( sw_res, "swishrank" ),
				SwishResultPropertyULong ( sw_res, "swishdocsize" ),
				SwishResultPropertyStr   ( sw_res, "swishtitle"),
				SwishResultPropertyStr   ( sw_res, "swishdbfile" ),
				SwishResultPropertyStr   ( sw_res, "swishlastmodified" ),
				SwishResultPropertyULong ( sw_res, "swishreccount" ),  /* can figure this out in loop, of course */
				SwishResultPropertyULong ( sw_res, "swishfilenum" )
			);
		}

		/* iterate over results */
		for (j = 0; j < ncols; j++)
		{
			bool	isnull;

			/* array value of this position */
			indx[0] = j + prop_dim_lower_bounds[0];

			dvalue = array_ref(prop_arr, prop_ndims, indx, -1, prop_len, prop_byval, prop_align, &isnull);

			if (!isnull && sw_res)
				values[j] = DatumGetCString(
					(char *) prop2text( sw_res,
						(char *)DirectFunctionCall1(textout, dvalue)
					));
			else
				values[j] = NULL;
		}
		/* construct the tuple */
		tuple = BuildTupleFromCStrings(attinmeta, values);

		/* now store it */
		tuplestore_puttuple(tupstore, tuple);

	}

	tuplestore_donestoring(tupstore);
	rsinfo->setResult = tupstore;

	/*
	 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
	 * tuples are in our tuplestore and passed back through
	 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
	 * that we actually used to build our tuples with, so the caller can
	 * verify we did what it was expecting.
	 */
	rsinfo->setDesc = tupdesc;
	MemoryContextSwitchTo(oldcontext);

	/* free swish object and close */
	Free_Search_Object( swish_search );
	SwishClose( swish_handle );

	return (Datum) 0;
}




/* make text var from property */
char *prop2text(SW_RESULT sw_res, char *propname) {
	char *val;
	char *prop;
	int len;

	elog(DEBUG2, "prop2text(%s)", propname);

	prop = SwishResultPropertyStr( sw_res, propname );
	if (error_or_abort( swish_handle )) return NULL;

	len = strlen(prop);
	elog(DEBUG1, "prop2text(%s) = '%s' %d bytes", propname, prop, len);

	len++;
	len *= sizeof(char);

	elog(DEBUG2, "palloc(%d)", len);

	val = palloc(len);

	memset(val, 0, len);
	strncpy(val, prop, len);

	elog(DEBUG2, "val=%s", val);

	return val;
}

/* make integer variable from property */
char *prop2int(SW_RESULT sw_res, char *propname) {
	char *val;
	unsigned long prop;
	int len;

	elog(DEBUG2, "prop2int(%s)", propname);

	prop = SwishResultPropertyULong( sw_res, propname );
	if (error_or_abort( swish_handle )) return NULL;

	elog(DEBUG1, "prop2int(%s) = %lu", propname, prop);

	len = 128 * sizeof(char);
	elog(DEBUG2, "palloc(%d)", len);

	val = palloc(len);
	memset(val, 0, len);

	snprintf(val, len, "%lu", prop);

	elog(DEBUG2, "val=%s", val);

	return val;
}


void elog_swish( SW_HANDLE swish_handle ) {
	int err = SwishError( swish_handle );
	elog((err ? ERROR : DEBUG1 ), 
		"pgswish status: %d [%s]: %s\n",
			err,
			SwishErrorString( swish_handle ),
			SwishLastErrorMsg( swish_handle )
	);
}

/*
 * check if swish has returned error, and elog it.
 */
int error_or_abort( SW_HANDLE swish_handle ) {
	if ( !SwishError( swish_handle ) )
		return 0;

	/* print a message */
	elog_swish( swish_handle );
/*
	elog(ERROR, 
		"pgswish error: Number [%d], Type [%s],  Optional Message: [%s]\n",
			SwishError( swish_handle ),
			SwishErrorString( swish_handle ),
			SwishLastErrorMsg( swish_handle )
	);
*/
	if ( swish_results ) Free_Results_Object( swish_results );
	if ( swish_search ) Free_Search_Object( swish_search );
	SwishClose( swish_handle );

	return;
}

