/*
 * integrate swish-e into PostgreSQL
 *
 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-02-18
 *
 * TODO:
 * - check null input using PG_ARGISNULL before using PG_GETARG_xxxx
 * - support composite type arguments
 * - split error_or_abort
 * - use getResultPropValue not SwishResultPropertyStr
 *
 * NOTES:
 * - clear structures with memset to support hash indexes (who whould like
 *   to create hash index on table returned from function?)
 * - number of returned rows is set by PostgreSQL evaluator, see:
 *   http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
 *
 * Based on:
 * - C example from PostgreSQL documentation (BSD licence)
 * - swish-e example src/libtest.c (GPL)
 * - _textin/_textout from pgcurl.c (LGPL)
 *
 * This code is licenced under GPL
 */

#include "postgres.h"
#include "fmgr.h"
#include "funcapi.h"
#include "utils/builtins.h"
#include "utils/array.h"
#include "miscadmin.h"
#include <swish-e.h>

#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))


PG_FUNCTION_INFO_V1(pgswish);
Datum pgswish(PG_FUNCTION_ARGS)
{
	ArrayType	*prop_arr = PG_GETARG_ARRAYTYPE_P(5);
	Oid		prop_element_type = ARR_ELEMTYPE(prop_arr);
	int		prop_ndims = ARR_NDIM(prop_arr);
	int		*prop_dim_counts = ARR_DIMS(prop_arr);
	int		*prop_dim_lower_bounds = ARR_LBOUND(prop_arr);
	int		ncols = 0;
	int		nrows = 0;
	int		indx[MAXDIM];
	int16		prop_len;
	bool		prop_byval;
	char		prop_align;
	ReturnSetInfo	*rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
	AttInMetadata	*attinmeta;
	TupleDesc	tupdesc;
	Tuplestorestate *tupstore = NULL;
	HeapTuple	tuple;
	MemoryContext	per_query_ctx;
	MemoryContext	oldcontext;
	Datum		dvalue;
	char		**values;
	int		rsinfo_ncols;
	int		i, j;
	/* swish-e */
	SW_HANDLE	swish_handle = NULL;	/* Database handle */
	SW_SEARCH	search = NULL;	/* search handle -- search parameters */
	SW_RESULTS	swish_results = NULL; /* results handle -- list of results */
	SW_RESULT	*sw_res = NULL;	/* one row from swish-e results */
	FILE		*logfh;

	int ecode, *est_result, resnum;
	int limit = 0;
	int offset = 0;

	char		*index_path;
	char		*query;
	char		*attr;


	/* only allow 1D input array */
	if (prop_ndims == 1)
	{
		ncols = prop_dim_counts[0];
	}
	else
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid input array"),
				 errdetail("Input array must have 1 dimension")));
		
	/* check to see if caller supports us returning a tuplestore */
	if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("materialize mode required, but it is not " \
						"allowed in this context")));

	/* get info about element type needed to construct the array */
	get_typlenbyvalalign(prop_element_type, &prop_len, &prop_byval, &prop_align);

	/* get the requested return tuple description */
	tupdesc = rsinfo->expectedDesc;
	rsinfo_ncols = tupdesc->natts;

	/*
	 * The requested tuple description better match up with the array
	 * we were given.
	 */
	if (rsinfo_ncols != ncols)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid input array"),
				 errdetail("Number of elements in array must match number of query specified columns.")));

	/* OK, use it */
	attinmeta = TupleDescGetAttInMetadata(tupdesc);

	/* Now go to work */
	rsinfo->returnMode = SFRM_Materialize;

	per_query_ctx = fcinfo->flinfo->fn_mcxt;
	oldcontext = MemoryContextSwitchTo(per_query_ctx);

	/* initialize our tuplestore */
	tupstore = tuplestore_begin_heap(true, false, SortMem);


	/* take rest of arguments from function */

	/* index path */
	if (PG_ARGISNULL(0)) {
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("index path can't be null"),
				 errdetail("Index path must be valid full path to HyperEstraier index")));
	}
	index_path = _textout(PG_GETARG_TEXT_P(0));

	/* query string */
	if (PG_ARGISNULL(0)) {
		query = "";
	} else {
		query = _textout(PG_GETARG_TEXT_P(1));
	}

	/* atribute filter */
	if (PG_ARGISNULL(2)) {
		attr = "";
	} else {
		attr = _textout(PG_GETARG_TEXT_P(2));
	}

	/* limit */
	if (PG_ARGISNULL(3)) {
		limit = 0;
	} else {
		limit = PG_GETARG_INT32(3);
	}

	/* offset */
	if (PG_ARGISNULL(4)) {
		offset = 0;
	} else {
		offset = PG_GETARG_INT32(4);
	}


	/* Send any errors or warnings to log, as well as
	 * STDOUT and STDERR (just to be sure) */
	if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
		set_error_handle( logfh );
		elog(INFO, "loggin swish-e errors to /tmp/pgswish.log");
		/* redirect STDOUT and STDERR to log */
		dup2(1, logfh);
		dup2(2, logfh);
	} else {
		elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
	}

	elog(INFO, "pgswish: SwishInit(%s)", index_path);

	swish_handle = SwishInit( index_path );

	if ( SwishError( swish_handle ) || ! swish_handle )
		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
			errmsg("pgswish: SwishInit(%s) failed", index_path ),
			errdetail( SwishErrorString( swish_handle ) )
		));

	elog(DEBUG1, "pgswish: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);


	/* set ranking scheme. default is 0 */
	SwishRankScheme( swish_handle, 0 );
	error_or_abort( swish_handle );

	elog(INFO, "pgswish: SwishQuery(%s)", query);
	/* Here's a short-cut to searching that creates a search object
	 * and searches at the same time */
	
	/* set the search phrase to the search condition object */
	if (! PG_ARGISNULL(1) && strlen(query) > 0)
		swish_results = SwishQuery( swish_handle, query);
	error_or_abort( swish_handle );

	/* total number of tuples to be returned */
	resnum = SwishHits( swish_results );

	/* FIXME */
	if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
		elog(DEBUG1,"ignored: %s", attr);
	}

	/* check if results exists */
	if ( 0 == resnum ) {
		elog(INFO, "pgswish: no results for: %s", query );
	}

	/* total number of tuples to be returned */
	if (limit && limit < resnum) {
		nrows = limit - offset;
	} else {
		nrows = resnum - offset;
	}


	elog(DEBUG1, "pgswish: found %d hits for %s", resnum, query);


	values = (char **) palloc(ncols * sizeof(char *));

	for (i = 0; i < nrows; i++)
	{

		/* get result from estraier */
		if (! ( SwishErrorString( swish_handle ) ) ) {
			elog(INFO, "can't find result %d", i + offset);
		} else {
			elog(INFO, "Path: %s\n  Rank: %lu\n  Size: %lu\n  Title: %s\n  Index: %s\n  Modified: %s\n  Record #: %lu\n  File   #: %lu\n\n",
				SwishResultPropertyStr   ( sw_res, "swishdocpath" ),
				SwishResultPropertyULong ( sw_res, "swishrank" ),
				SwishResultPropertyULong ( sw_res, "swishdocsize" ),
				SwishResultPropertyStr   ( sw_res, "swishtitle"),
				SwishResultPropertyStr   ( sw_res, "swishdbfile" ),
				SwishResultPropertyStr   ( sw_res, "swishlastmodified" ),
				SwishResultPropertyULong ( sw_res, "swishreccount" ),  /* can figure this out in loop, of course */
				SwishResultPropertyULong ( sw_res, "swishfilenum" )
			);
		}

		/* iterate over results */
		for (j = 0; j < ncols; j++)
		{
			bool	isnull;

			/* array value of this position */
			indx[0] = j + prop_dim_lower_bounds[0];

			dvalue = array_ref(prop_arr, prop_ndims, indx, -1, prop_len, prop_byval, prop_align, &isnull);

			if (!isnull && sw_res)
				values[j] = DatumGetCString(
					prop2text( sw_res,
						(char *)DirectFunctionCall1(textout, dvalue)
					));
			else
				values[j] = NULL;
		}
		/* construct the tuple */
		tuple = BuildTupleFromCStrings(attinmeta, values);

		/* now store it */
		tuplestore_puttuple(tupstore, tuple);

	}

	tuplestore_donestoring(tupstore);
	rsinfo->setResult = tupstore;

	/*
	 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
	 * tuples are in our tuplestore and passed back through
	 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
	 * that we actually used to build our tuples with, so the caller can
	 * verify we did what it was expecting.
	 */
	rsinfo->setDesc = tupdesc;
	MemoryContextSwitchTo(oldcontext);

	/* free swish object and close */
	Free_Search_Object( search );
	SwishClose( swish_handle );

	return (Datum) 0;
}


/* make text var from property */
char *prop2text(SW_RESULT sw_res, char *propname) {
	char *val;
	char *prop;
	int len;

	elog(DEBUG2, "prop2text(%s)", propname);

	prop = SwishResultPropertyStr( sw_res, propname );
	// FIXME if (error_or_abort( swish_handle )) return NULL;

	len = strlen(prop);
	elog(DEBUG1, "prop2text(%s) = '%s' %d bytes", propname, prop, len);

	len++;
	len *= sizeof(char);

	elog(DEBUG2, "palloc(%d)", len);

	val = palloc(len);

	memset(val, 0, len);
	strncpy(val, prop, len);

	elog(DEBUG2, "val=%s", val);

	return val;
}

/* make integer variable from property */
char *prop2int(SW_RESULT sw_res, char *propname) {
	char *val;
	unsigned long prop;
	int len;

	elog(DEBUG2, "prop2int(%s)", propname);

	prop = SwishResultPropertyULong( sw_res, propname );
	// if (error_or_abort( swish_handle )) return NULL;

	elog(DEBUG1, "prop2int(%s) = %lu", propname, prop);

	len = 128 * sizeof(char);
	elog(DEBUG2, "palloc(%d)", len);

	val = palloc(len);
	memset(val, 0, len);

	snprintf(val, len, "%lu", prop);

	elog(DEBUG2, "val=%s", val);

	return val;
}


/*
 * check if swish has returned error, and elog it.
 */
static int error_or_abort( SW_HANDLE swish_handle ) {
	if ( !SwishError( swish_handle ) )
		return 0;

	/* print a message */
	ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
		errmsg("pgswish error: Number [%d], Type [%s]",
			SwishError( swish_handle ),
			SwishErrorString( swish_handle )
		),
		errdetail( SwishLastErrorMsg( swish_handle )
		)
	));
	if ( swish_results ) Free_Results_Object( swish_results );
	if ( search ) Free_Search_Object( search );
	SwishClose( swish_handle );

	return 1;
}

