/* DO NOT EDIT THIS FILE. EDIT THE ORIGINAL SOURCE FILES INSTEAD AND RUN make */
/*********************************************************************
Spade, a Snort preprocessor plugin to report unusual packets
Author: James Hoagland, Silicon Defense (hoagland@SiliconDefense.com)
copyright (c) 2000,2001 by Silicon Defense (http://www.silicondefense.com/)

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.  

Spade description:

SPADE, the Statistical Packet Anomaly Detection Engine, is a Snort
preprocessor plugin to report packets that are unusual for your network. 
Port scans and probes tend to be unusual, so this will tend to report them
(as well as some benign packets that are simply uncommon).

Spade's home page: http://www.silicondefense.com/spice/

Please send complaints, kudos, and especially improvements and bugfixes to
hoagland@SiliconDefense.com. This is a research project and would love to
have your feedback.  It is still under active development and may change at
any time.

This file (anomsensor_plug.c) is part of Spade v092200.1.  It contains all
the Snort- and sensor-specific code in Spade.
*********************************************************************/

/* Internal version control: $Id: */

/*#define LOG10 2.30258509299 */
#define LOG2 0.69314718056
 
#include "spp_anomsensor.h"
#include "rules.h"
#include "log.h"
#include <string.h>

/* the threshold at which anomolous events are reported */
double report_anom_thres;

char *outfile; // the name of the output log file
char *statefile; // the name of the file to checkpoint to and recover from
int checkpoint_freq; // the frequency (in recorded packet counts) with which
                     // to checkpoint
int prob_mode; // the probability calculation mode

int as_debug= 0; // the bigger the number, the more debuging statements
                 // that are active
int parts=0,part=11; // if parts is 1, the part indicates which part section
                     // in record_packet should be run, overriding the
                     // probabity mode; don't try to calculate anomaly scores
                     // in this case

int adapting=0; // is there an adaptation module active on this run?
int need_anom= 0; // does some module need the anomaly score calculated
double last_anom_score; // the anomaly score for this packet
int skip_packet;  // is this packet being skipped (not added to the tree)
time_t last_pkt_time=(time_t)0; // the time of the last packet added

int tot_packets=0; // the total number of packets added to the tree
                   // on this run
int recent_packets= 0; // the number of packets added since the count was
                       // last reset
int alert_count= 0; // the count of alert sent about packets
int recent_alert_count= 0; // the count of alerts sent since the count was
                           // last reset

int pp_active= 0; // this is a count of how many modules have added
                  // themselves to the preprocessor list and will be calling
                  // record_maybe_skip()
int pp_run_on_pkt= 0; // this is how many have called record_maybe_skip() so
                      // far on this packet


/* globals used in the tree and memory management */
const char *featurename[NUM_FEATURES]={"sip","dip","sport","dport"};
/*const char *featurename[NUM_FEATURES]={"sip","dip","sport","dport","ttl","win"};*/

mindex TNULL;
dmindex DMINDEXMASK;

treeroot **ROOT_M;
intnode **INT_M;
leafnode **LEAF_M;

mindex root_freelist;
mindex int_freelist;
mindex leaf_freelist;

unsigned char ROOT_BLOCK_BITS;
unsigned char INT_BLOCK_BITS;
unsigned char LEAF_BLOCK_BITS;
unsigned int MAX_ROOT_BLOCKS;
unsigned int MAX_INT_BLOCKS;
unsigned int MAX_LEAF_BLOCKS;
mindex T[NUM_FEATURES];
/*************/

/* The most basic role of Spade is to add packets to a tree in a certain way
which will allow probabilities of various features to be calculated.  The
next most basic thing it does is to calculate anomaly scores base on this. 
Typically, when a certain score threshold is exceeded, snort alerts are
generated.  There are additional modules within the sensor (turned on by
config file lines) which do additional things like adapting the reporting
threshold and generating statistics. */
 
/* A call to this function needs to be added to plugbase.c somehow */
void SetupSpade()
{
    /* link the preprocessor keyword list to the init functions in 
       the preproc list to arrange for modules to run when specified */
    RegisterPreprocessor("spade", SpadeInit);
    RegisterPreprocessor("spade-homenet", SpadeHomenetInit);
    RegisterPreprocessor("spade-stats", SpadeStatInit);
    RegisterPreprocessor("spade-threshlearn", SpadeThreshlearnInit);
    RegisterPreprocessor("spade-adapt", SpadeAdaptInit);
    RegisterPreprocessor("spade-adapt2", SpadeAdapt2Init);
    RegisterPreprocessor("spade-adapt3", SpadeAdapt3Init);
    RegisterPreprocessor("spade-survey", SpadeSurveyInit);

	if (as_debug) printf("Preprocessor: Spade is setup...\n");
}



/*========================================================================*/
/*========================= Spade core routines ==========================*/
/*========================================================================*/

/* snort config file line:
	preprocessor spade: [ <anom-report-thresh> [ <state-file> [ <log-file> [ <prob-mode> [ <checkpoint-freq> ]]]]]
	where:
	  <anom-report-thresh> is the (initial) reporting threshold for
anomalous events, or a negative number to not report (default -1)
	  <state-file> is the name of the checkpoint and recovery file to record
to and startup from, or 0 not to checkpoint or recover (default spade.rcv)
	  <log-file> is the name of the file to log to, or '-' for stdout
(default '-')
	  <prob-mode> is the probability mode to run in (0 for bayes net with 4
features, 1 for full joint prob with 4 features, 2 for full joint with 3
feaures, or 3 for full joint with 2 features) (default 3)
	  <checkpoint-freq> is the fequency of checkpointing, in terms of tree
addition counts (default 50000)
*/

/* Spade core init function:
     set up anamaly sensor, register the signal handler,
     register the preprocessor function */
void SpadeInit(u_char *args)
{
	pp_active++;
	
    /* parse the argument list from the rules file */
    ParseSpadeArgs(args);
	if (report_anom_thres >= 0) need_anom= 1;

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpade);

	if (strcmp(statefile,"0") && recover(statefile)) {
		if (as_debug) printf("Recovered from file %s\n",statefile);
	} else {
		init_mem();
		tree_init();
	}

#ifndef OLD_SNORT
    // requires snort 1.6.1-beta3 or later
	AddFuncToCleanExitList(SpadeCatchSig,NULL);
	AddFuncToRestartList(SpadeCatchSig,NULL);
#else
	// use this if above won't compile
    signal(SIGUSR1, CleanUpSpade);
    signal(SIGQUIT, CleanUpSpade);
    signal(SIGHUP, CleanUpSpade);
#endif

	if (as_debug) printf("Preprocessor: Spade Initialized\n");
}

/* Spade 'spade' argument parsing function  */
void ParseSpadeArgs(char *args)
{
    char **toks;
    int numToks;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		report_anom_thres = atof(toks[0]);
	} else {
		report_anom_thres= -1;
	}
	if (as_debug) printf("anomaly reporting threshold is %f\n",report_anom_thres);
	if (numToks > 1) {
		statefile = toks[1];
	} else {
		statefile= "spade.rcv";
	}
	if (as_debug) printf("state file is %s\n",statefile);
	if (numToks > 2) {
    	outfile = toks[2];
    } else {
    	outfile= "-";
    }
	if (as_debug) printf("output file is %s\n",outfile);
	if (numToks > 3) {
    	prob_mode = atoi(toks[3]);
    	if (prob_mode > 3 || prob_mode < 0) {
    		ErrorMessage("Warning: spp_anomsensor probabity mode #%d undefined, using #3 instead",prob_mode);
    		prob_mode= 3;
    	}
    } else {
    	prob_mode= 3;
    }
	if (as_debug) printf("probability mode is %d\n",prob_mode);
	if (numToks > 4) {
    	checkpoint_freq= atoi(toks[4]);
    } else {
    	checkpoint_freq= 50000;
    }
	if (as_debug) printf("checkpoint frequency is %d\n",checkpoint_freq);
}

/* Spade core routine that is called with each packet */
void PreprocSpade(Packet *p)
{
	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	
	if (report_anom_thres >= 0.0 && last_anom_score >= report_anom_thres) {
		char logMessage[65];
		alert_count++;
		recent_alert_count++;
		sprintf(logMessage,"spp_anomsensor: Anomaly threshold exceeded: %.4f",last_anom_score);
		(*AlertFunc)(p, logMessage);
	}
}	



/*========================================================================*/
/*========================= SpadeHomenet module ==========================*/
/*========================================================================*/

/* This module makes only packets to certain networks be considered for the
anomaly sensor; list your most common networks first for increased
efficiency */

/* snort config file line:
	preprocessor spade-homenet: {<network>}
	where <network> is a network in CIDR notation (address/numbits)
	                   or an IP address */
														
ll_net *homelist= NULL;  // the only networks we should be looking at packets going to

/* Spade homenet init function:
     set up the homenet list */
void SpadeHomenetInit(u_char *args)
{
    char **toks;
    int numToks;

	if (as_debug) printf("Preprocessor: SpadeHomenet Initialized\n");

    /* parse the argument list from the rules file */
    toks = mSplit(args, " ", 200, &numToks, '\\');
    if (strspn(toks[numToks-1]," \t") == strlen(toks[numToks-1])) numToks--; /* last is just whitespace */
    homelist= create_netlist(toks,numToks);
    
    if (as_debug) {
    	ll_net *n;
   		struct in_addr net;
    	printf("SpadeHomenet nets are:\n");
    	for (n=homelist; n != NULL; n=n->next) {
    		net.s_addr= n->netaddr;
    		printf("\t%s with mask %lx\n",inet_ntoa(net),(u_long)ntohl(n->netmask));
    	}
    }
}

// create a linked list of network specifications (address and netmask) from
//  a array of strings representing an CIDR network spec or an IP address
ll_net *create_netlist(char *nets[],int count) {
	ll_net *prev=NULL,*head=NULL,*cur=NULL;
	int i;
    char **toks;
    int num_toks;
    int nmask;
    struct in_addr net;
	
	for (i=0; i < count; i++) {
		cur= (ll_net *)malloc(sizeof(ll_net));
		cur->next= NULL;
		if (i > 0) {
			prev->next= cur;
		} else {
			head= cur;
		}
		
		// this code based strongly on GenHomenet in snort.c
		/* break out the CIDR notation from the IP address */
	    toks = mSplit(nets[i],"/",2,&num_toks,0);

        /* convert the CIDR notation into a real live netmask */
	    if (num_toks < 2) {
	    	nmask= 32;
	    } else { 
	    	nmask = atoi(toks[1]);
	    }

        if ((nmask >= 0) && (nmask <= 32))
        {
            cur->netmask = netmasks[nmask];
        }
        else
        {
            FatalError("ERROR: Bad CIDR size [%d], 1 to 32 please!\n",
                       nmask);
        }

	    /* since PC's store things the "wrong" way, shuffle the bytes into 
	       the right order */
#ifndef WORDS_BIGENDIAN
	    cur->netmask = htonl(cur->netmask);
#endif

	    /* convert the IP addr into its 32-bit value */
	    if ((net.s_addr = inet_addr(toks[0])) ==-1)
	    {
	        FatalError("ERROR: network (%s) didn't translate with inet_addr, must be poorly formed\n",
	                   toks[0]);
	    }
	    else
	    {
	        cur->netaddr = ((u_long)net.s_addr & cur->netmask);
	    }

	    free(toks);
		
		prev= cur;
	}
	
	return head;
}

/*========================================================================*/
/*=========================== SpadeStat module ===========================*/
/*========================================================================*/

/* Whenever the CleanUpSpade is invoked, this module arranges for certain
   specified statistics to be written to the log file.  The available
   statistics depend on what is recorded in the tree, which depends on the
   probability measure used.  There is no good way to have more granularity
   at present.  You need to change the setting of the "parts" variable to 1
   and set the part variable to one of the parts in the record_packet
   routine (to which you might add a new part). */

/* snort config file line:
	preprocessor spade-stats: {<stat-option>}
	where <stat-option> is one of:
	  "entropy" (to display the known entropies and conditional entropies)
	  "uncondprob" (to display the known non-0 simple (joint) probabilities)
	  "condprob" (to display the known non-0 conditional (joint)
probabilities) */
														
/* vars to store what the stats module should report */
int print_entropy= 0;
int print_condprob= 0;
int print_uncondprob= 0;

/* Spade Stat module init function:
     set up the print_* variables */
void SpadeStatInit(u_char *args)
{
    char **toks;
    int numToks;
    int i;
    
	if (as_debug) printf("Preprocessor: SpadeStat Initialized\n");

    /* parse the argument list from the rules file */
    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	for (i= 0; i < numToks; i++) {
		if (!(strcmp(toks[i],"entropy"))) {
			print_entropy= 1;
	    } else if (!(strcmp(toks[i],"condprob"))) {
	    	print_condprob= 1;
	   	} else if (!(strcmp(toks[i],"uncondprob"))) {
	    	print_uncondprob= 1;
	  	} else {
	   		fprintf(stderr,"Anomaly Sensor Stat: option \"%s\" not recognized\n",toks[i]);
	    }
	}
}



/*========================================================================*/
/*======================== SpadeThreshlearn module =======================*/
/*========================================================================*/

/* Given a packet count and a length of time, this module reports a reporting
   threshold that would have been effective in producing that number of alerts
   in that time interval.  The idea is that one might use this as a threshold
   for future runs.  The module quietly watches the network for the length of
   time, adding events to the tree and calculating anomaly scores.  When the
   time period is up, the module calls exit() after reporting the top anomaly
   scores seen to the log file. */
   
/* snort config file line:
	preprocessor spade-threshlearn: [ <num-scores> [ <obs-time> ]]
	where:
	  <num-scores> is the number of packets to report for (default 200)
	  <obs-time> is the number of hours to run for (default 24)
*/

/* variables used in the threshold learning module */
int tl_obs_size=0;  // the number of anomalous packets desired
time_t tl_obs_secs; // how long to observe for
ll_double *top_anom_list; // the start of the list of anomaly scores we
                          // maintain; the scores are the highest we've
                          // observed; this list can be up to tl_obs_size+1
                          // long and is orderd by increasing score; the
                          // list is initialized to 0 -> 0 in case we never
                          // see enough packets
int top_anom_list_size; // the number of scores on the list (0-based)
time_t obs_start_time=(time_t)0; // the start time of the observation, set
                                 // after the first packet we see


/* Spade threshold learning module init function:
     set up threshold learning module per args and
     register its preprocessor function */
void SpadeThreshlearnInit(u_char *args)
{
	pp_active++;
	need_anom= 1;
	
    /* parse the argument list from the rules file */
    ParseSpadeThreshlearnArgs(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeThreshlearn);

	/* init list to contain just 0; this is to let us assume the list is not
	   empty elsewhere */
	top_anom_list= (ll_double *)malloc(sizeof(ll_double));
	top_anom_list->val= 0.0;
	top_anom_list_size= 1;
	
	if (as_debug) printf("Preprocessor: SpadeThreshlearn Initialized\n");
}

/* Spade 'spade-thesshlearn' argument parsing function */
void ParseSpadeThreshlearnArgs(char *args)
{
    char **toks;
    int numToks;
    double hours;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		tl_obs_size = atoi(toks[0]);
	} else {
		tl_obs_size= 200;
	}
	if (as_debug) printf("observation size is %d\n",tl_obs_size);
	if (numToks > 1) {
		hours = atof(toks[1]);
		tl_obs_secs= (long)(hours*3600);
	} else {
		tl_obs_secs= 24*3600;
	}
	if (as_debug) printf("seconds of observation is %d\n",(int)tl_obs_secs);
}

/* Spade threshold learning module routine that is called with each packet */
void PreprocSpadeThreshlearn(Packet *p)
{
	size_t packet_time= p->pkth->ts.tv_sec;
	double anom;
	ll_double *new,*prev,*l;
	static int alldone=0;

	if (alldone) return;

	if (obs_start_time == 0) { /* first packet */
		obs_start_time= packet_time;
	} else if ((long)packet_time > (obs_start_time + tl_obs_secs)) {
		CleanUpSpade(SIGUSR1);
		alldone=1;
	}

	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	anom= last_anom_score;
	
	if (top_anom_list_size <= tl_obs_size) {
		new= (ll_double *)malloc(sizeof(ll_double));
		top_anom_list_size++;
	} else if (anom > top_anom_list->val) {
		if (top_anom_list->next != NULL && anom < top_anom_list->next->val) {
			top_anom_list->val= anom; /* can just replace first */
			return;
		}
		new= top_anom_list;
		top_anom_list= top_anom_list->next;
	} else {
		return;
	}
	new->val= anom;
	for (prev= top_anom_list, l=top_anom_list->next; l != NULL && anom > l->val; prev=l,l=l->next);
	/* add between prev and l */
	prev->next= new;
	new->next= l;	
}	



/*========================================================================*/
/*=========================== SpadeAdapt module ==========================*/
/*========================================================================*/

/* Given a report count target and a length of time, this module tries to keep
   the reporting threshold at a level that would produce that number of alerts
   in that time interval based on what was observed in the last interval.  To
   support this, a list of the most anomalous scores seen in the current
   interval is maintained.  At the end of the interval, an ideal threshold is
   calculated based on the interval's scores.  This is combined linearly with
   the current threshold to produce the threshold for the next interval.  As a
   default option, the interval can implemented in terms of a count of packets,
   where this count is the average number of packets seen during the specified
   time interval length; this tends to make the transitions more smooth and
   reliable since a more constant number of anomaly scores is used in finding
   the topmost anamolous ones. */

/* snort config file line:
	preprocessor spade-adapt: [ <target-count> [ <adapt-time> [ <new-weight> [ <interval-by-count> ]]]]
	where:
	  <target-count> is the number of packets to aim for (default 20)
	  <adapt-time> is the number of hours in the interval (default 2)
	  <new-weight> is the part of new threshold based on the observed ideal for
	    the previous interal (where the rest depends on the previous threshold)
	    (default 0.5)
	  <interval-by-count> is whether to measure intervals by count (0 to
	    measure strictly by time, 1 to do it by count) (default 1)
*/

/* global-scope variables used in the adapt module */
// the number of alerts that is ideal for the given length of time
int adapt_target=0;
// the length of time in which to ideally produce the given number of alerts;
//   also the interval at which to adjust the report threshold
time_t adapt_period;
// the weight to give to the new observation ideal cutoff in determining the
//   new weight
float new_obs_weight;
// adapt by count or by time only
int adapt_by_count;
// the head of the list of anomaly scores.  This list is like the one in the
//   threshold learning module above
ll_double *top_adapt_list;
// the current size of this list (0-based)
int top_adapt_list_size;


/* Spade adapt module init function:
     set up the adapt module per its args and register its preprocessor function */
void SpadeAdaptInit(u_char *args)
{
	if (adapting) {
		fprintf(stderr,"Anomoly sensor threshold adapting repeadly specified, ignoring later specification: %s\n",args);
		return;
	}
	adapting= 1;
	pp_active++;
	need_anom= 1;

    /* parse the argument list from the rules file */
    ParseSpadeAdaptArgs(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeAdapt);

	/* init list to contain 0 and 0; this is to let us assume the list has a
	   bottom and runner-up elsewhere */
	top_adapt_list= (ll_double *)malloc(sizeof(ll_double));
	top_adapt_list->val= 0.0;
	top_adapt_list->next= (ll_double *)malloc(sizeof(ll_double));
	top_adapt_list->next->val= 0.0;
      top_adapt_list->next->next= NULL;
	top_adapt_list_size= 1;
	
	if (as_debug) printf("Preprocessor: SpadeAdapt Initialized\n");
}

/* Spade 'spade-adapt' argument parsing function  */
void ParseSpadeAdaptArgs(char *args)
{
    char **toks;
    int numToks;
    double hours;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		adapt_target = atoi(toks[0]);
	} else {
		adapt_target= 20;
	}
	if (numToks > 1) {
		hours = atof(toks[1]);
	} else {
		hours= 2;
	}
	adapt_period= (long)(hours*3600);
	if (as_debug) printf("adapt target count is %d\n",adapt_target);
	if (as_debug) printf("adapt target period is %d\n",(int)adapt_period);
	if (numToks > 2) {
		new_obs_weight = (float)atof(toks[2]);
	} else {
		new_obs_weight= 0.5;
	}
	if (numToks > 3) {
		adapt_by_count = atoi(toks[3]);
	} else {
		adapt_by_count= 1;
	}
}

/* Spade adapt module routine that is called with each packet */
void PreprocSpadeAdapt(Packet *p)
{
	/* see if time to adjust the rate and if so, do so, and reset */
	size_t packet_time= p->pkth->ts.tv_sec;
	ll_double *new,*prev,*l;
	// when the time interval is time-based, this is when the current interval
	//   started; otherwise this is the last time the packets per interval
	//   was updated average
	static time_t last_adapt_time=(time_t)0;
	// the time period #, starting with 1 for the first interval
	static int time_period_num= 1;
	// the average number of packets per time interval as most recently
	//   calculated
	static float average_pkt_rate;
	
	if ((long)packet_time > (last_adapt_time + adapt_period)) {
		if (last_adapt_time == 0) { /* first packet */
			last_adapt_time= packet_time;
			time_period_num= 1;
		} else {
			if (!adapt_by_count || time_period_num <= 1) { /* adapt by time since not doing count or since this is first period */
				if (as_debug) {
					printf("%d alerts in last time period (of %d)\n",recent_alert_count,recent_packets);
				}
				do_adapt();
			}
			if (adapt_by_count) { /* collect packet rate stats */
				average_pkt_rate= tot_packets/(float)time_period_num;
				if (as_debug) {
					static int last_repcount;
					printf("End of time period %d: ave pkt rate is now %.2f\n",time_period_num,average_pkt_rate);
					printf("  %d alerts in last time period; ave alert rate is %.2f\n",(alert_count-last_repcount),alert_count/(float)time_period_num);
					last_repcount= alert_count;
				}
				time_period_num++;
			}
			last_adapt_time+= adapt_period;
		}
	}
	
	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */

	if (adapt_by_count) { /* we are adapting by count */
		if (time_period_num > 1 && recent_packets > average_pkt_rate) { /* time to adapt; note that average_pkt_rate can be adjusted any time in our counting */
			if (as_debug) {
				printf("%d alerts in last packet period (of %d)\n",recent_alert_count,recent_packets);
			}
			do_adapt();
		}
	}
	
	/* add anomaly score to list if it is high enough */
	if (top_adapt_list_size <= adapt_target) {
		new= (ll_double *)malloc(sizeof(ll_double));
		top_adapt_list_size++;
	} else if (last_anom_score > top_adapt_list->val) {
		if (last_anom_score < top_adapt_list->next->val) {
			top_adapt_list->val= last_anom_score; /* can just replace first */
			return;
		}
		new= top_adapt_list;
		top_adapt_list= top_adapt_list->next;
	} else {
		return;
	}
	new->val= last_anom_score;
	for (prev= top_adapt_list, l=top_adapt_list->next; l != NULL && last_anom_score > l->val; prev=l,l=l->next);
	/* add between prev and l */
	prev->next= new;
	new->next= l;
}	

void do_adapt() {
	ll_double *l;
	double obs_thresh= (top_adapt_list->val + top_adapt_list->next->val)/2;
	if (as_debug) printf("observed recent ideal threshold is %.4f\n",obs_thresh);
	if (report_anom_thres < 0.0) { /* started up with no reporting */
		set_new_threshold(obs_thresh);
	} else {
		set_new_threshold((1-new_obs_weight)*report_anom_thres + new_obs_weight*obs_thresh);
	}
	
	if (as_debug) printf("new threshold is %.4f\n",report_anom_thres);	
	
	for (l=top_adapt_list; l != NULL; l=l->next)  l->val= 0.0;
	recent_alert_count= 0;
	recent_packets= 0;
}



/*========================================================================*/
/*========================== SpadeAdapt2 module ==========================*/
/*========================================================================*/

/* Given an hourly alert target count (or target fraction) and a length of
   time, this module tries to keep the reporting threshold at a level that
   would produce that number of alerts (or fraction of total reports) in an
   hour based on what has been observed in the past.  When the report threshold
   is updated, it is based in equal parts on observations from the short term,
   middle term, and long term (at least for these that have been observed). 
   The user can specify the time period for observations, the number of those
   that make up the short term (NS), the number of short terms that make up the
   medium term (NM), and the number of medium terms that make up the long term
   (NL).  The short term component of the threshold is defined to be the
   average of the kth and (k+1)st highest anomaly scores in the last NS
   complete periods of observation, where k is number of anamoly reports that
   should occur in the observation period assuming a uniform rate.  The middle
   term component is the average of the last NM special short term components. 
   The special short term components are the ones that are multiples of NS if
   labeled with the number of observation periods that had completed when it
   was calculated (i.e., #NS, #2NS, #3NS, etc.); these have the property that
   they are based entirely on distinct measurements.  The long term component
   is based on the last NL medium term componenets, including the current one. 
   For each of the components, if there have been less than the specified
   number of constituant parts (but there has been at least one complete one),
   what is observed thus far is used.  To accomadate the varying rates of
   packets fairly, the observation period is based on a count of packets.  This
   count is the product of the specified observation period and the average
   packet rate.
*/

/* snort config file line:
	preprocessor spade-adapt2: [ <target-spec> [ <obs-time> [ <NS> [ <NM> [ <NL> ]]]]]
	where:
	  <target-spec> if >= 1, is the number of alerts to aim for in an hour, and
	    if < 1, is the fraction of packets to aim for (default 0.01)
	  <obs-time> is the number of minutes in an observation period (default 15)
	  <NS> is the number of observation periods that make up the short term
	    (default 4)
	  <NM> is the number of short terms in the medium term (default 24)
	  <NL> is the number of medium terms in the long term (default 7)
*/

/* global-scope variables used in the adapt2 module */
// the first and second arguments from the config line
double adapt2_targetspec,obsper;
// the 3rd, 4th, and 5th args
int NS,NM,NL;
// the current target based on adapt2_targetspec
int adapt2_target;
// latest middle and long term components
double mid_anom_comp,long_anom_comp;
// representation of an array of observation lists, the heads and tails
dll_double **obslists_head,**obslists_tail;
// an array of the (0-based) size of these lists
int *obslists_size;
// the number of complete observation periods
int obsper_count;
// arrays of short and medium term components used for calculating other components
double *recScomps,*recMcomps;

/* Spade adapt2 module init function:
     set up the adapt2 module per its args and register its preprocessor function */
void SpadeAdapt2Init(u_char *args)
{
	int i;
	if (adapting) {
		fprintf(stderr,"Anomoly sensor threshold adapting repeadly specified, ignoring later specification: %s\n",args);
		return;
	}
	adapting= 1;
	pp_active++;
	need_anom= 1;

    /* parse the argument list from the rules file */
    ParseSpadeAdapt2Args(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeAdapt2);

	obslists_head= (dll_double **)malloc(NS * sizeof(dll_double *));
	obslists_tail= (dll_double **)malloc(NS * sizeof(dll_double *));
	obslists_size= (int *)malloc(NS * sizeof(int));
	for (i= 0; i < NS; i++) {
		obslists_head[i]= new_dlink(0.0);
		obslists_tail[i]= new_dlink(0.0);
		obslists_head[i]->next= obslists_tail[i];
		obslists_tail[i]->prev= obslists_head[i];
		obslists_size[i]= 1;
	}
	obsper_count= 0;
	recScomps= (double *)malloc(NM * sizeof(double));
	recMcomps= (double *)malloc(NL * sizeof(double));
	
	if (as_debug) printf("Preprocessor: SpadeAdapt2 Initialized\n");
}

/* Spade 'spade-adapt2' argument parsing function  */
void ParseSpadeAdapt2Args(char *args)
{
    char **toks;
    int numToks;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		adapt2_targetspec= atof(toks[0]); /* if >= 1, is an hourly count, else is a fraction of total packets */
	} else {
		adapt2_targetspec= 0.01;
	}
	if (numToks > 1) {
		obsper= atof(toks[1])*60.0; /* basic observation/adjust time in mins, converted to secs */
	} else {
		obsper= 15.0*60.0;
	}
	/* 10000 packets per hour is our pure guess as to the rate of packets.
	   Is there a better way to figure out how many packets to note for our
	   first interval when we want a percent of packets? */
	adapt2_target= (int)floor(0.5+ (adapt2_targetspec >= 1 ? adapt2_targetspec*(obsper/3600.0) : ((10000/3600.0)*obsper)*adapt2_targetspec));
	if (adapt2_target==0) adapt2_target= 1; /* ensure at least 1 long */
	if (numToks > 2) {
		NS= atoi(toks[2]); /* how many of the previous go into the time observation of ideal wait and the recent portion of the adapted weight */
	} else {
		NS= 4;
	}
	if (numToks > 3) {
		NM= atoi(toks[3]); /* how many of the previous go into an average to determine the middle portion of the adapted weight */
	} else {
		NM= 24;
	}
	if (numToks > 4) {
		NL= atoi(toks[4]); /* how many of the previous go into an average to determine the long-term portion of the adapted weight */
	} else {
		NL= 7;
	}
	if (as_debug) printf("adapt2 target is %d\n",adapt2_target);
	if (as_debug) printf("%2f seconds in obs per1; %d of these in recent; %d 2's in middle; %d in long\n",obsper,NS,NM,NL);
}

/* Spade adapt2 module routine that is called with each packet */
void PreprocSpadeAdapt2(Packet *p)
{
	/* see if time to adjust the rate and if so, do so, and reset */
	size_t packet_time= p->pkth->ts.tv_sec;
	dll_double *new,*prev,*l;
	int i;
	// the start time of the current observation period
	static time_t obsper_start=(time_t)0;
	// the number of packets thus far in this observation
	static int obscount=0;
	// the last calculated average packet count per component;
	// used to figure out when to adjust the threshold;
	// set high initially to be sure to get a correct value before doing this
	static double ppc= 100000000.0;
	// obsper_count % NS, which obslist to add to
	static int obslist_new_slot= 0;
	
	if (packet_time > (obsper_start + obsper)) {
		static int rec_int_count;
		if (obsper_start == 0) { /* first packet */
			obsper_start= packet_time;
			rec_int_count= 0;
			recent_alert_count= 0;
		} else { /* time to update ppc */
			rec_int_count++;
			if (as_debug) {
				printf("%d alerts in time period %d (of %d packets)\n",recent_alert_count,rec_int_count,recent_packets);
			}
			ppc= tot_packets/(double)rec_int_count;
			obsper_start+= (long)obsper;
			if (as_debug) {
				static int last_repcount;
				printf("End of time period %d: ppc is now %.2f\n",rec_int_count,ppc);
				printf("  %d alerts in last time period; ave alert rate is %.2f\n",(alert_count-last_repcount),(float)alert_count/(float)rec_int_count);
				last_repcount= alert_count;
			}
			
			adapt2_target= (int)floor(0.5+ (adapt2_targetspec >= 1 ? adapt2_targetspec*(obsper/3600.0) : adapt2_targetspec*ppc));
			if (adapt2_target==0) adapt2_target= 1; /* ensure at least 1 long */
			if (as_debug) printf("new target is %d\n",adapt2_target);
			
			if (obsper_count == 0) {
				obsper_count++;
				obslist_new_slot= obsper_count % NS;
				if (obslists_size[0] > adapt2_target) { /* remove excess */
					for (i= adapt2_target, l=obslists_head[0]; i < obslists_size[0]; i++,l=l->next);
					l->prev->next= NULL;
					l->prev= NULL;
					free_dlinks(obslists_head[0]);
					obslists_head[0]= l;
				}
				set_new_threshold((obslists_head[0]->val + obslists_head[0]->next->val)/2.0);
				if (as_debug) printf("-> initial adapted threshold is %.5f\n",report_anom_thres);
				obscount= 0;
				recent_packets= 0;
				recent_alert_count= 0;
			}
		}
	}
	
	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	obscount++;
	
	if (obscount > ppc) {
		if (as_debug) {
			printf("%d alerts at end of packet period #%d (of %d)\n",recent_alert_count,obslist_new_slot,recent_packets);
		}
		
		set_new_threshold(calc_new_thresh());
		if (as_debug) printf("-> new threshold is %.5f\n",report_anom_thres);
		
		obsper_count++;
		obslist_new_slot= obsper_count % NS;
		reset_obslist(obslist_new_slot);
		obscount= 0;
		recent_packets= 0;
		recent_alert_count= 0;
	}

	if (obslists_size[obslist_new_slot] < adapt2_target) {
		new= new_dlink(last_anom_score);
		obslists_size[obslist_new_slot]++;
	} else if (last_anom_score > obslists_head[obslist_new_slot]->val) {
		if (last_anom_score < obslists_head[obslist_new_slot]->next->val) {
			obslists_head[obslist_new_slot]->val= last_anom_score; /* can just replace first in place*/
			return;
		}
		new= obslists_head[obslist_new_slot];
		new->val= last_anom_score;
		obslists_head[obslist_new_slot]= obslists_head[obslist_new_slot]->next;
		new->next->prev= NULL;
	} else {
		return;
	}
	for (l=obslists_head[obslist_new_slot]->next; l != NULL && last_anom_score > l->val; l=l->next);
	/* add between l->prev and l */
	prev= (l == NULL) ? obslists_tail[obslist_new_slot] : l->prev;
	prev->next= new;
	new->prev= prev;
	new->next= l;
	if (l == NULL) {
		obslists_tail[obslist_new_slot]= new;
	} else {
		l->prev= new;
	}
}

double calc_new_thresh() {
	static int per2_count=0,per3_count=0; // the count of period 2 and 3 instances

	double rec_anom_comp= thresh_from_obslists();
	if (as_debug) printf("* New recent anom observation (#%d) is %.5f\n",obsper_count,rec_anom_comp);
	if (obsper_count < (NS-1)) {
		return rec_anom_comp; /* haven't observed mid or long yet */
	}
	if (((obsper_count+1) % NS) == 0) { /* time to add new mid */
		recScomps[per2_count % NM]= rec_anom_comp;
		if (as_debug) printf("recScomps[%d]:= %.5f\n",per2_count % NM,rec_anom_comp);
		per2_count++;
		mid_anom_comp= anom_ave(recScomps,((per2_count < NM)?per2_count:NM));
		if (as_debug) printf("** New mid anom component (#%d) is %.5f\n",per2_count-1,mid_anom_comp);
		if (per2_count < (NM-1)) {
			return (rec_anom_comp+mid_anom_comp)/2.0; /* haven't observed long yet */
		}
		if ((per2_count % NM) == 0) { /* time to add new long */
			recMcomps[per3_count % NL]= mid_anom_comp;
			if (as_debug) printf("recMcomps[%d]:= %.5f\n",per3_count % NL,mid_anom_comp);
			per3_count++;	
			long_anom_comp= anom_ave(recMcomps,((per3_count < NL)?per3_count:NL));
			if (as_debug) printf("*** New long anom component (#%d) is %.5f\n",per3_count-1,long_anom_comp);
		}
	}
	if (per2_count < NM) {
		return (rec_anom_comp+mid_anom_comp)/2.0; /* haven't observed long yet */
	}
	return (rec_anom_comp+mid_anom_comp+long_anom_comp)/3.0;
}

double thresh_from_obslists() {
	dll_double **pos= (dll_double **)malloc(NS * sizeof(dll_double *));
 	int i,c,maxpos=-1;
	double max,last_score=0.0,before_last_score=0.0;
	if (as_debug > 1) {
		dll_double *l;
		printf("thresh_from_obslists: finding score that is #%d highest in:\n",adapt2_target);
		for (i= 0; i < NS; i++) {
			printf("  slot %d: %.5f",i,obslists_head[i]->val);
			for (l=obslists_head[i]->next; l != NULL; l=l->next) {
				printf(" -> %.5f",l->val);
			}
			printf("\n");
		}
	}
	for (i= 0; i < NS; i++) {
		pos[i]= obslists_tail[i];
	}
	for (c= 1; c <= adapt2_target+1; c++) {
		max= -1;
		for (i= 0; i < NS; i++) {
			if (pos[i] != NULL) {
				if (max < pos[i]->val) {
					max= pos[i]->val;
					maxpos= i;
				}
				
			}
		}
		if (max == -1) return last_score; /* should only happen if we don't
		                                    have enough packets recorded */
		pos[maxpos]= pos[maxpos]->prev;
		before_last_score= last_score;
		last_score= max; /* in case this is the last */
	}
	return (before_last_score+last_score)/2.0;
}

double anom_ave(double a[],int size) {
	double sum= 0.0;
	int i;
	if (as_debug) {
		printf("anom_ave: taking average of (%.5f",a[0]);
		for (i=1; i < size; i++) printf(",%.5f",a[i]);
		printf(")\n");
	}
	for (i=0; i < size; i++) sum+= a[i];
	return sum/(double)size;
}

void reset_obslist(int slot) {
	dll_double *first= obslists_head[slot];
	dll_double *second= first->next;
	if (second->next != NULL) free_dlinks(second->next);
	first->val= 0.0;
	second->val= 0.0;
	second->next= NULL;
	obslists_tail[slot]= second;
	obslists_size[slot]= 1;
}



/*========================================================================*/
/*========================== SpadeAdapt3 module ==========================*/
/*========================================================================*/

/* Given an hourly alert target count (or target fraction) and a length of
   time, this module tries to keep the reporting threshold at a level that
   would produce that number of alerts (or fraction of total reports) in an
   hour based on what has been observed in the past.  ...
*/

/* snort config file line:
	preprocessor spade-adapt3: [ <target-spec> [ <obs-time> [ <num-obs>]]]
	where:
	  <target-spec> if >= 1, is the number of alerts to aim for in an hour, and
	    if < 1, is the fraction of packets to aim for (default 0.01)
	  <obs-time> is the number of minutes in an observation period (default 60)
	  <num-obs> is the number of observation periods to average over (default 168)
*/

/* global-scope variables used in the Adapt3 module */
// the first and second arguments from the config line
double adapt3_targetspec,adapt3_obsper;
// the 3rd arg
int NO;
// the current target based on adapt3_targetspec
int adapt3_target;
// an array of past observations
double *adapt3hist;
// a linked list of current anomaly scores
ll_double *adapt3anoms;
// (0-based) size of this lists
int adapt3anoms_size;
// number of completed observation period
int completed_obs_per;

/* Spade Adapt3 module init function:
     set up the Adapt3 module per its args and register its preprocessor function */
void SpadeAdapt3Init(u_char *args)
{
	if (adapting) {
		fprintf(stderr,"Anomoly sensor threshold adapting repeadly specified, ignoring later specification: %s\n",args);
		return;
	}
	adapting= 1;
	pp_active++;
	need_anom= 1;

    /* parse the argument list from the rules file */
    ParseSpadeAdapt3Args(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeAdapt3);

	adapt3hist= (double *)malloc(sizeof(double)*NO);
	
	/* init list to contain 0 and 0; this is to let us assume the list
	   has a bottom and runner-up elsewhere */
	adapt3anoms= (ll_double *)malloc(sizeof(ll_double));
	adapt3anoms->val= 0.0;
	adapt3anoms->next= (ll_double *)malloc(sizeof(ll_double));
	adapt3anoms->next->val= 0.0;
      adapt3anoms->next->next= NULL;
	adapt3anoms_size= 1;
	completed_obs_per= 0;
	
	if (as_debug) printf("Preprocessor: SpadeAdapt3 Initialized\n");
}

/* Spade 'spade-Adapt3' argument parsing function  */
void ParseSpadeAdapt3Args(char *args)
{
    char **toks;
    int numToks;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		adapt3_targetspec= atof(toks[0]); /* if >= 1, is an hourly count, else is a fraction of total packets */
	} else {
		adapt3_targetspec= 0.01;
	}
	if (numToks > 1) {
		adapt3_obsper= atof(toks[1])*60.0; /* basic observation/adjust time in mins, converted to secs */
	} else {
		adapt3_obsper= 15.0*60.0;
	}
	/* 10000 packets per hour is our pure guess as to the rate of packets.
	   Is there a better way to figure out how many packets to note for our
	   first interval when we want a percent of packets? */
	adapt3_target= (int)floor(0.5+ (adapt3_targetspec >= 1 ? adapt3_targetspec*(adapt3_obsper/3600.0) : ((10000/3600.0)*adapt3_obsper)*adapt3_targetspec));
	if (adapt3_target==0) adapt3_target= 1;
	if (numToks > 2) {
		NO= atoi(toks[2]); /* how many of the previous go into the time observation of ideal wait and the recent portion of the adapted weight */
	} else {
		NO= 168;
	}
	if (as_debug) printf("Adapt3 target is %d\n",adapt3_target);
	if (as_debug) printf("%2f seconds in obs per; %d of these in history\n",adapt3_obsper,NO);
}

/* Spade Adapt3 module routine that is called with each packet */
void PreprocSpadeAdapt3(Packet *p)
{
	size_t packet_time= p->pkth->ts.tv_sec;
	ll_double *prev,*newstart,*next,*new;
	int i;
	// the start time of the current observation period
	static time_t adapt3_obsper_start=(time_t)0;
	// the number of packets thus far in this observation
	static int obscount=0;
	// the last calculated average packet count per interval; used to figure out when to adjust the threshold; set high initially to be sure to get a correct value before doing this
	static double ppi= 100000000.0;
	
	/* see if time to adjust the rate and if so, do so, and reset */
	if (packet_time > (adapt3_obsper_start + adapt3_obsper)) {
		static int rec_int_count;
		if (adapt3_obsper_start == 0) { /* first packet */
			adapt3_obsper_start= packet_time;
			rec_int_count= 0;
			recent_alert_count= 0;
		} else { /* time to update ppi */
			rec_int_count++;
			ppi= tot_packets/(double)rec_int_count;
			adapt3_obsper_start+= (long)adapt3_obsper;
			if (as_debug) printf("End of time period %d: ppi is now %.2f\n",rec_int_count,ppi);
			
			adapt3_target= (int)floor(0.5+ (adapt3_targetspec >= 1 ? adapt3_targetspec*(adapt3_obsper/3600.0) : adapt3_targetspec*ppi));
			if (adapt3_target==0) adapt3_target= 1;
			if (as_debug) printf("new target is %d\n",adapt3_target);
			
			if (completed_obs_per == 0) {
				if (adapt3anoms_size > adapt3_target) { /* remove excess */
					for (i= adapt3_target, prev=adapt3anoms; (i+1) < adapt3anoms_size; i++,prev=prev->next);
					newstart= prev->next;
					prev->next= NULL;
					free_links(adapt3anoms);
					adapt3anoms= newstart;
				}
				do_adapt3();
				obscount= 0;
			}
		}
	}
	
	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	obscount++;
	
	if (obscount > ppi) {
		if (as_debug) {
			printf("%d alerts at end of packet period #%d (of %d)\n",recent_alert_count,completed_obs_per+1,recent_packets);
		}
		do_adapt3();
		obscount= 0;
	}

	/* add anomaly score to list if it is high enough */
	if (adapt3anoms_size <= adapt3_target) {
		new= new_link(last_anom_score);
		adapt3anoms_size++;
	} else if (last_anom_score > adapt3anoms->val) {
		if (last_anom_score < adapt3anoms->next->val) {
			adapt3anoms->val= last_anom_score; /* can just replace first */
			return;
		}
		new= adapt3anoms;
		new->val= last_anom_score;
		adapt3anoms= adapt3anoms->next;
	} else {
		return;
	}
	for (prev= adapt3anoms, next=adapt3anoms->next; next != NULL && last_anom_score > next->val; prev=next,next=next->next);
	/* add between prev and next */
	prev->next= new;
	new->next= next;
}

void do_adapt3() {
	ll_double *l;
	static double obssum= 0; // the sum of all current elements in the array
	double obs_thresh= (adapt3anoms->val + adapt3anoms->next->val)/2;
	int slot;
	
	if (as_debug) printf("observed recent ideal threshold for adapt3 is %.4f\n",obs_thresh);
	
	slot= completed_obs_per % NO;
	completed_obs_per++;
	if (completed_obs_per > NO) obssum-= adapt3hist[slot]; /* kicking a score out */
	adapt3hist[slot]= obs_thresh;
	obssum+= obs_thresh;
	
	if (as_debug > 1) {
		int i;
		printf("adapt3hist= [");
		printf("%.4f",adapt3hist[0]);
		for (i= 1; i < NO && i < completed_obs_per; i++) {
			printf(",%.4f",adapt3hist[i]);
		}
		printf("]\n");
	}
	
	set_new_threshold(obssum/((completed_obs_per >= NO)?NO:completed_obs_per));	
	if (as_debug) printf("new threshold is %.4f\n",report_anom_thres);	
	
	for (l=adapt3anoms; l != NULL; l=l->next)  l->val= 0.0;
	recent_alert_count= 0;
	recent_packets= 0;
}


/*========================================================================*/
/*========================== SpadeSurvey module ==========================*/
/*========================================================================*/

/* This module surveys the anomoly scores observed across periods of time
and reports this to a specified survey file.  The period #, the packet
count, the median score, the 90th percentile score, and the 99th percentile
score are recorded to the file in tab-delinated format.  Interpolation is
used between scores if there is no score at exactly the position implied by
the percentile. */

/* efficiency note:  This use linked list to represent the observed anomoly scores.  While it is necessary to maintain all these scores (the current worst score might end up being the 99th percentile), a different representation (order stat tree?) should be used if the packet count gets high.  */

/* snort config file line:
	preprocessor spade-survey: [ <survey-file> [ <observation-period> ]]
	where:
	  <survey-file> the file to write the survery results to (default is stdout)
	  <observation-period> the interval for the survey in minutes (default 60)
*/

/* global-scope variables used in the survey module */
// the survey log file handle
FILE *survey_log= NULL;
// the list of anomaly scores for the survey
ll_double *survey_list;
// the length of the list (1-based)
int survey_list_len;
// the number of seconds in the survey interval
float survey_interval;
// the suvery period number (starts with 1)
int survey_period;

/* Spade survey module init function:
     set up the survey module per its args and register its preprocessor function */
void SpadeSurveyInit(u_char *args)
{
	pp_active++;
	need_anom= 1;
	
    /* parse the argument list from the rules file */
    ParseSpadeSurveyArgs(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeSurvey);

	fprintf(survey_log,"%.2f minute interval #\tPacket Count\tMedian Anom\t90th Percentile Anom\t99th Percentile Anom\n",survey_interval/60.0);

	survey_list= NULL;
	survey_list_len= 0;
	survey_period= 1;
	
	if (as_debug) printf("Preprocessor: SpadeSurvey Initialized\n");
}

/* Spade 'spade-survey' argument parsing function  */
void ParseSpadeSurveyArgs(char *args)
{
    char **toks;
    int numToks;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
    	survey_log= fopen(toks[0],"w");
    	if(!survey_log) FatalError("spp_anomsensor: unable to open %s to record survey",toks[0]);
    } else {
    	survey_log= stdout;
    }
	if (numToks > 1) {
		survey_interval = (float)(atof(toks[1])*60.0);
	} else {
		survey_interval= 60*60;
	}
	if (as_debug) printf("seconds of survey interval is %d\n",(int)survey_interval);
}

/* Spade survey module routine that is called with each packet */
void PreprocSpadeSurvey(Packet *p)
{
	size_t packet_time= p->pkth->ts.tv_sec;
	double anom;
	ll_double *new,*prev,*next;
	// the start time for this survey interval
	static time_t survey_interval_start_time=(time_t)0;
	// the number of packets seen in this survey period so far
	static int survey_rec_count= 0;

	while (packet_time > (survey_interval_start_time + survey_interval)) {
		if (survey_interval_start_time == 0) { /* first packet */
			survey_interval_start_time= packet_time;
		} else {
			fprintf(survey_log,"%d\t%d\t%.6f\t%.6f\t%.6f\n",survey_period,survey_rec_count,survey_ostat(0.5),survey_ostat(0.9),survey_ostat(0.99));
            fflush(survey_log);
            if (survey_list) 
			    free_links(survey_list);
			survey_list= NULL;
			survey_list_len= 0;
			survey_rec_count=0;
			survey_period++;
			survey_interval_start_time+= (long)survey_interval;
		}
	}

	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	survey_rec_count++;
	anom= last_anom_score;
	new= new_link(anom);
	
	if (survey_list == NULL) {
		survey_list= new;
		survey_list_len= 1;
	} else {
		if (anom < survey_list->val) { /* add at head */
			new->next= survey_list;
			survey_list= new;
		} else {
			for (prev= survey_list, next=survey_list->next; next != NULL && anom > next->val; prev=next,next=next->next);
			/* add between prev and next */
			prev->next= new;
			new->next= next;	
		}
		survey_list_len++;
	}
}	

double survey_ostat(double loc) {
	ll_double *pos;
	int p;
	double fromnext;
	double posnum;
	
	//printf("loc= %f\n",loc);
	if (survey_list_len == 0) return 0.0;
	posnum= loc*(double)survey_list_len + (1-loc);/* = (survey_list_len-1)*loc+1 */

	for (p= 1, pos=survey_list; p <= posnum; p++,pos=pos->next);
	fromnext= posnum-(double)(p-1);
	if (fromnext == 0 || pos->next == NULL) { /* got it exactly */
		return pos->val;
	} else {
		return (pos->val*(1-fromnext))+(pos->next->val*fromnext);
	}
}

/*********************************************************************/
/*********************************************************************/

int record_maybe_skip(Packet *p) {
	valtype val[NUM_FEATURES];
	ll_net *home;
	size_t packet_time= p->pkth->ts.tv_sec;
	static time_t last_scale=(time_t)0; // the last time the tree was scaled
	
	if (pp_run_on_pkt == pp_active || !pp_run_on_pkt) { /* first time this packet hit */
		while (packet_time - last_scale > SCALE_FREQ) {
			if (last_scale == (size_t)0) { /* this is the first packet */
				last_scale= packet_time;
			} else {
				if (as_debug > 1) printf("scaling by %f at time %d; discarding at %f\n",SCALE_FACTOR,packet_time,MIN_NODE_SIZE);
				scale_and_prune_all_trees(SCALE_FACTOR,MIN_NODE_SIZE);
				last_scale+= SCALE_FREQ;  /* lets pretend we did this right on time */
				if (as_debug > 1) printf("done with scale/prune\n");
			}
		}
		
		skip_packet= p->iph == NULL || p->tcph == NULL || p->iph->ip_proto != IPPROTO_TCP || p->tcph->th_flags != 2;  /* is this a TCP SYN? */
		if (!skip_packet && homelist != NULL) {
			skip_packet= 1; /* skip unless is in a homenet */
			for (home= homelist; home != NULL; home=home->next) {
				if ((p->iph->ip_dst.s_addr & home->netmask) == home->netaddr) {
					skip_packet= 0;
					break;
				}
			}
		}
		if (skip_packet) return 1;

		record_packet(p,val);
		pp_run_on_pkt= 1;
		last_pkt_time= packet_time;
	
		if (as_debug && (tot_packets % 10000) == 0) {
			printf("packet # %d has been added\n",tot_packets);
		}
		if ((tot_packets % checkpoint_freq) == 0) {
			if (strcmp(statefile,"0")) checkpoint(statefile);
		}

		if (need_anom) {
			last_anom_score= calc_anom(val);
			//printf("last_anom_score=%f\n",last_anom_score);
		}
	} else {
		pp_run_on_pkt++;
		if (skip_packet) return 1;
	}
	
	return 0;
}


double calc_anom(valtype val[]) {
	double prob;
	if (!parts) {
		features fl[]= {DIP,DPORT,SIP,SPORT};
		features vl[]= {val[DIP],val[DPORT],val[SIP],val[SPORT]};
		if (prob_mode == 0) {
			prob= prob_simple(DPORT,val[DPORT]) *  /* P(dport) */
				prob_cond2(SIP,val[SIP],DPORT,val[DPORT],SPORT,val[SPORT]) *  /* P(sip|dport,sport) */
				prob_cond1(SPORT,val[SPORT],DPORT,val[DPORT]) *  /* P(sport|dport) */
				prob_cond2(DIP,val[DIP],SPORT,val[SPORT],SIP,val[SIP]);  /* P(dip|sport,sip) */
			return -1*(log(prob)/LOG2);
		} else if (prob_mode == 1) {
			return -1.0*log((double)prob_Njoint(4,fl,vl)/LOG2);
		} else if (prob_mode == 2) {
			return -1.0*log((double)prob_Njoint(3,fl,vl)/LOG2);
		} else if (prob_mode == 3) {
			return -1.0*log((double)prob_2joint(DIP,val[DIP],DPORT,val[DPORT])/LOG2);
		} 
		return 9999999.0;
	} else return 999999.0;
}

void record_packet(Packet *p,valtype val[]) {
	char sip[16];
	char dip[16];
	recent_packets++;
	tot_packets++;
	val[SIP]= p->iph->ip_src.s_addr;
	val[DIP]= p->iph->ip_dst.s_addr;
	val[SPORT]= p->sp;
	val[DPORT]= p->dp;
	//val[TTL]= p->iph->ip_ttl;
	//val[WIN] = p->tcph->th_win;
	
	if (as_debug > 2) {
		strncpy(sip, inet_ntoa(p->iph->ip_src), 16);
		strncpy(dip, inet_ntoa(p->iph->ip_dst), 16);
		printf("adding %s, %s, %d, %d\n",sip,dip,val[SPORT],val[DPORT]);
	}
	if (parts) {
		if (part == 0) {
			/* full all at once */
			
			/* record needed conditional probabilities */
			increment_4joint_count(SIP,val[SIP],DIP,val[DIP],SPORT,val[SPORT],DPORT,val[DPORT],0);
			increment_4joint_count(DIP,val[DIP],SPORT,val[SPORT],DPORT,val[DPORT],SIP,val[SIP],0);
			increment_4joint_count(SIP,val[SIP],DIP,val[DIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
			increment_4joint_count(SIP,val[SIP],SPORT,val[SPORT],DPORT,val[DPORT],DIP,val[DIP],1);
			
			/*increment_3joint_count(SIP,val[SIP],DIP,val[DIP],DPORT,val[DPORT],3);*/
			increment_3joint_count(SIP,val[SIP],DPORT,val[DPORT],DIP,val[DIP],1);
			increment_3joint_count(DIP,val[DIP],DPORT,val[DPORT],SIP,val[SIP],1);

			increment_3joint_count(SIP,val[SIP],SPORT,val[SPORT],DIP,val[DIP],2);
			/*increment_3joint_count(SIP,val[SIP],DIP,val[DIP],SPORT,val[SPORT],3);*/
			increment_3joint_count(DIP,val[DIP],SPORT,val[SPORT],SIP,val[SIP],2);

			/*increment_3joint_count(SIP,val[SIP],SPORT,val[SPORT],DPORT,val[DPORT],3);*/
			increment_3joint_count(SIP,val[SIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
			increment_3joint_count(SPORT,val[SPORT],DPORT,val[DPORT],SIP,val[SIP],0);

			increment_2joint_count(DIP,val[DIP],SIP,val[SIP],1);
			increment_2joint_count(SPORT,val[SPORT],SIP,val[SIP],1);
			increment_2joint_count(DPORT,val[DPORT],SIP,val[SIP],0);
			/*increment_2joint_count(SIP,val[SIP],DIP,val[DIP],2);*/
			increment_2joint_count(SPORT,val[SPORT],DIP,val[DIP],1);
			increment_2joint_count(DPORT,val[DPORT],DIP,val[DIP],1);
			/*increment_2joint_count(SIP,val[SIP],SPORT,val[SPORT],2);*/
			/*increment_2joint_count(DIP,val[DIP],SPORT,val[SPORT],2);*/
			increment_2joint_count(DPORT,val[DPORT],SPORT,val[SPORT],1);
			/*increment_2joint_count(SIP,val[SIP],DPORT,val[DPORT],2);*/
			/*increment_2joint_count(DIP,val[DIP],DPORT,val[DPORT],2);*/
			/*increment_2joint_count(SPORT,val[SPORT],DPORT,val[DPORT],2);*/
		} else if (part == 1) {
			increment_4joint_count(SIP,val[SIP],DIP,val[DIP],SPORT,val[SPORT],DPORT,val[DPORT],0);
			increment_4joint_count(SIP,val[SIP],DIP,val[DIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
		} else if (part == 2) {
			increment_4joint_count(SIP,val[SIP],SPORT,val[SPORT],DPORT,val[DPORT],DIP,val[DIP],0);
			increment_3joint_count(SIP,val[SIP],SPORT,val[SPORT],DIP,val[DIP],2);
		} else if (part == 3) {
			increment_3joint_count(SIP,val[SIP],DPORT,val[DPORT],DIP,val[DIP],0);
			increment_3joint_count(SIP,val[SIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
		} else if (part == 4) {
			increment_2joint_count(DIP,val[DIP],SIP,val[SIP],0);
			increment_3joint_count(DIP,val[DIP],SPORT,val[SPORT],SIP,val[SIP],1);
			increment_4joint_count(DIP,val[DIP],SPORT,val[SPORT],DPORT,val[DPORT],SIP,val[SIP],2);
		} else if (part == 5) {
			increment_3joint_count(DIP,val[DIP],DPORT,val[DPORT],SIP,val[SIP],0);
			increment_3joint_count(DIP,val[DIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
		} else if (part == 6) {
			increment_2joint_count(SPORT,val[SPORT],DIP,val[DIP],0);
		} else if (part == 7) {
			increment_3joint_count(SPORT,val[SPORT],DPORT,val[DPORT],SIP,val[SIP],0);
			increment_3joint_count(SPORT,val[SPORT],DPORT,val[DPORT],DIP,val[DIP],2);
		} else if (part == 8) {
			increment_3joint_count(DPORT,val[DPORT],SIP,val[SIP],DIP,val[DIP],0);
			increment_2joint_count(DPORT,val[DPORT],DIP,val[DIP],1);
			increment_2joint_count(DPORT,val[DPORT],SPORT,val[SPORT],1);
		} else if (part == 9) {
			increment_2joint_count(SIP,val[SIP],DIP,val[DIP],0);
			increment_2joint_count(SIP,val[SIP],SPORT,val[SPORT],1);
			increment_2joint_count(SIP,val[SIP],DPORT,val[DPORT],1);
			increment_2joint_count(DIP,val[DIP],SIP,val[SIP],0);
			increment_2joint_count(DIP,val[DIP],SPORT,val[SPORT],1);
			increment_2joint_count(DIP,val[DIP],DPORT,val[DPORT],1);
			increment_2joint_count(SPORT,val[SPORT],SIP,val[SIP],0);
			increment_2joint_count(SPORT,val[SPORT],DIP,val[DIP],1);
			increment_2joint_count(SPORT,val[SPORT],DPORT,val[DPORT],1);
			increment_2joint_count(DPORT,val[DPORT],SIP,val[SIP],0);
			increment_2joint_count(DPORT,val[DPORT],DIP,val[DIP],1);
			increment_2joint_count(DPORT,val[DPORT],SPORT,val[SPORT],1);
		} /*else if (part == 10) {
			increment_2joint_count(TTL,val[TTL],DIP,val[DIP],0);
			increment_2joint_count(TTL,val[TTL],SPORT,val[SPORT],1);
			increment_2joint_count(TTL,val[TTL],DPORT,val[DPORT],1);
			increment_2joint_count(TTL,val[TTL],SIP,val[SIP],1);
			increment_2joint_count(DIP,val[DIP],TTL,val[TTL],0);
			increment_2joint_count(SPORT,val[SPORT],TTL,val[TTL],0);
			increment_2joint_count(DPORT,val[DPORT],TTL,val[TTL],0);
			increment_2joint_count(SIP,val[SIP],TTL,val[TTL],0);
		} else if (part == 11) {
			increment_2joint_count(WIN,val[WIN],DIP,val[DIP],0);
			increment_2joint_count(WIN,val[WIN],SPORT,val[SPORT],1);
			increment_2joint_count(WIN,val[WIN],DPORT,val[DPORT],1);
			increment_2joint_count(WIN,val[WIN],SIP,val[SIP],1);
			increment_2joint_count(WIN,val[WIN],TTL,val[TTL],1);
			increment_2joint_count(DIP,val[DIP],WIN,val[WIN],0);
			increment_2joint_count(SPORT,val[SPORT],WIN,val[WIN],0);
			increment_2joint_count(DPORT,val[DPORT],WIN,val[WIN],0);
			increment_2joint_count(SIP,val[SIP],WIN,val[WIN],0);
			increment_2joint_count(TTL,val[TTL],WIN,val[WIN],0);
		}*/
	} else {
		if (prob_mode == 0) {
			increment_3joint_count(SPORT,val[SPORT],SIP,val[SIP],DIP,val[DIP],0);
			increment_3joint_count(DPORT,val[DPORT],SPORT,val[SPORT],SIP,val[SIP],0);
		} else if (prob_mode == 1) {
			increment_4joint_count(DIP,val[DIP],DPORT,val[DPORT],SIP,val[SIP],SPORT,val[SPORT],0);
		} else if (prob_mode == 2) {
			increment_3joint_count(DIP,val[DIP],DPORT,val[DPORT],SIP,val[SIP],0);
		} else if (prob_mode == 3) {
			increment_2joint_count(DIP,val[DIP],DPORT,val[DPORT],0);
		} 
	}
}


void set_new_threshold(double t) {
	char logMessage[85];
	
	report_anom_thres= t;
	sprintf(logMessage,"spp_anomsensor: Threshold adjusted to %.4f after %d alerts (of %d)",report_anom_thres,recent_alert_count,recent_packets);
	(*AlertFunc)(NULL, logMessage);
}

/**********************************************************
 * Called on signals
 *****************************************************/
void SpadeCatchSig(int signal,void *arg) {
	if (signal == SIGQUIT || signal == SIGHUP || signal == SIGUSR1) {
		CleanUpSpade(signal);
	}
}

void CleanUpSpade(int signal) 
{
	featcomb H;
    FILE *file;
    
    if (!tot_packets) return;
    
    if (strcmp(statefile,"0")) checkpoint(statefile);
    
    if (!strcmp(outfile,"-")) {
    	file= stdout;
    } else {
	    file = fopen(outfile, "w");
    	if(!file) FatalError("spp_anomsensor: unable to open %s",outfile);
    }

	fprintf(file,"%d packets recorded\n",tot_packets);
	if (alert_count > 0) fprintf(file,"%d packets reported as alerts\n",alert_count);
	
	if (tl_obs_size && top_anom_list_size > 1 && last_pkt_time-obs_start_time>0) {
		ll_double *n;
		double obs_hours= (last_pkt_time-obs_start_time)/3600.0;
		fprintf(file,"Threshold learning results: top %d anomaly scores over %.5f hours\n",top_anom_list_size-1,obs_hours);
		fprintf(file,"  Suggested threshold based on observation: %.6f\n",(top_anom_list->val+top_anom_list->next->val)/2);
		fprintf(file,"  Top scores: %.5f",top_anom_list->next->val);
		for (n=top_anom_list->next->next; n != NULL; n=n->next) {
			fprintf(file,",%.5f",n->val);
		}
		fprintf(file,"\n  First runner up is %.5f, so use threshold between %.5f and %.5f for %.3f packets/hr\n",top_anom_list->val,top_anom_list->val,top_anom_list->next->val,(top_anom_list_size/obs_hours));
	}

	if (print_entropy) {
		H= calc_all_entropies();
		write_all_entropies(file,H);
	}
	if (print_uncondprob) write_all_uncond_probs(file);
	if (print_condprob) write_all_cond_probs(file);
	
	if (file != stdout) {
		fclose(file);
	}
	
	if (survey_log != NULL) {
		fflush(survey_log);
	}
}


/* creation and recylcling routines for ll_double's */
ll_double *free_link_list=NULL;

ll_double *new_link(double val) {
	ll_double *link;
	if (free_link_list != NULL) {
		link= free_link_list;
		free_link_list= link->next;
	} else {
		link= (ll_double *)malloc(sizeof(ll_double));
	}
	link->val= val;
	link->next= NULL;
	return link;
}

void free_links(ll_double *start) {
	ll_double *end,*next;
	for (end= start, next=start->next; next != NULL; end=next,next=next->next);
	end->next= free_link_list;
	free_link_list= start;
}

/* creation and recylcling routines for dll_double's */
dll_double *free_dlink_list= NULL;

dll_double *new_dlink(double val) {
	dll_double *link;
	if (free_dlink_list != NULL) {
		link= free_dlink_list;
		free_dlink_list= link->next;
	} else {
		link= (dll_double *)malloc(sizeof(dll_double));
	}
	link->val= val;
	link->prev= NULL;
	link->next= NULL;
	return link;
}

void free_dlinks(dll_double *start) {
	dll_double *end;
	for (end= start; end->next != NULL; end=end->next);
	end->next= free_dlink_list;
	free_dlink_list= start;
}
/*********************************************************************
tree.c, distributed as part of Spade v092200.1
Author: James Hoagland, Silicon Defense (hoagland@SiliconDefense.com)
copyright (c) 2000 by Silicon Defense (http://www.silicondefense.com/)
Released under GNU General Public License, see the COPYING file included
with the distribution or http://www.silicondefense.com/spice/ for details.

tree.h contains all the routines to build and maintain the tree structure
that Spade uses to maintain its probability tables.  It also contains the
access functions.

Please send complaints, kudos, and especially improvements and bugfixes to
hoagland@SiliconDefense.com.  As described in GNU General Public License, no
warranty is expressed for this program.
*********************************************************************/

#include <limits.h>
#include <math.h>

#ifndef LOG2
/*#define LOG2 log(2);*/
#define LOG2 ((double)0.693147180559945)
#endif

void tree_init() {
	int i;
	for (i=0; i < NUM_FEATURES; i++) {
		T[i]= TNULL;
	}
}

#ifndef WIN32
inline int min(int a,int b) {
	return a < b ? a : b;
}

inline int max(int a,int b) {
	return a > b ? a : b;
}
#endif

void increment_simple_count(features type1,valtype val1) {
	if (T[type1] == TNULL) {
		T[type1]= new_treeinfo(type1);
	}
	incr_tree_value_count(T[type1],val1);
}

/* assumes type1 and type2 are in a consistant order */
void increment_2joint_count(features type1,valtype val1,features type2,valtype val2,int skip) {
	mindex leaf1,tree2;
	
	if (skip >= 1) {
		/* this should always find something and T[type1] should be non-NULL since has been marked before */
		leaf1= find_leaf(T[type1],val1);
	} else {
		if (T[type1] == TNULL) {
			T[type1]= new_treeinfo(type1);
		}
		leaf1= incr_tree_value_count(T[type1],val1);
	}
	tree2= get_nexttree_of_type(leaf1,type2);
	incr_tree_value_count(tree2,val2);
}

void increment_3joint_count(features type1,valtype val1,features type2,valtype val2,features type3,valtype val3,int skip) {
	mindex leaf1,leaf2,tree2,tree3;
	
	if (skip >= 1) {
		/* this should always find something and T[type1] should be non-NULL since has been marked before */
		leaf1= find_leaf(T[type1],val1);
	} else {
		if (T[type1] == TNULL) {
			T[type1]= new_treeinfo(type1);
		}
		leaf1= incr_tree_value_count(T[type1],val1);
	}
	tree2= get_nexttree_of_type(leaf1,type2);
	/* skip case: find_leaf should always find something since has been marked before */
	leaf2= skip >= 2 ? find_leaf(tree2,val2) : incr_tree_value_count(tree2,val2);
	tree3= get_nexttree_of_type(leaf2,type3);
	incr_tree_value_count(tree3,val3);
}

void increment_4joint_count(features type1,valtype val1,features type2,valtype val2,features type3,valtype val3,features type4,valtype val4,int skip) {
	mindex leaf1,leaf2,leaf3,tree2,tree3,tree4;
	
	if (skip >= 1) {
		/* this should always find something and T[type1] should be non-NULL since has been marked before */
		leaf1= find_leaf(T[type1],val1);
	} else {
		if (T[type1] == TNULL) {
			T[type1]= new_treeinfo(type1);
		}
		leaf1= incr_tree_value_count(T[type1],val1);
	}
	tree2= get_nexttree_of_type(leaf1,type2);
	/* skip case: find_leaf should always find something since has been marked before */
	leaf2= skip >= 2 ? find_leaf(tree2,val2) : incr_tree_value_count(tree2,val2);
	tree3= get_nexttree_of_type(leaf2,type3);
	/* skip case: find_leaf should always find something since has been marked before */
	leaf3= skip >= 3 ? find_leaf(tree3,val3) : incr_tree_value_count(tree3,val3);
	tree4= get_nexttree_of_type(leaf3,type4);
	incr_tree_value_count(tree4,val4);
}

/*****************************************************/

double prob_simple(features type1,valtype val1) {
	if (T[type1] == TNULL) {
		fprintf(stderr,"Warning: attempt to find probability of value %d in an empty tree of type %d; returning 0.0\n",val1,type1);
		return 0.0; /* actually this is an undefined case; this feature was not counted */
	}
	return tree_value_prob(T[type1],val1);
}

/* return the probabilty of the value in the tree; assumes tree is not TNULL */
double tree_value_prob(mindex tree,valtype val) {
	mindex root=treeroot(tree);
	mindex leaf=find_leaf_in_subtree(root,val);
	if (leaf == TNULL) return 0.0;
	return leafcount(leaf)/count_or_sum(root);
}

double prob_cond1(features type,valtype val,features ctype,valtype cval) {
	mindex condleaf,tree,leaf;
	if (T[ctype] == TNULL) {
		return 0.0; /* actually this is an undefined case; this feature was not counted */
	}
	condleaf= find_leaf(T[ctype],cval);
	if (condleaf == TNULL) return 0.0;
	tree= find_nexttree_of_type(condleaf,type);
	if (tree == TNULL) {
		return 0.0; /* actually this is an undefined case; this feature was not counted */
	}
	leaf= find_leaf(tree,val);
	if (leaf == TNULL) return 0.0;
	return leafcount(leaf)/leafcount(condleaf);
}

double prob_cond2(features type,valtype val,features ctype1,valtype cval1,features ctype2,valtype cval2) {
	mindex condleaf,leaf,tree;
	condleaf= find_leaf2(ctype1,cval1,ctype2,cval2);
	if (condleaf == TNULL) return 0.0;
	tree= find_nexttree_of_type(condleaf,type);
	if (tree == TNULL) {
		return 0.0; /* actually this is an undefined case; this feature was not counted */
	}
	leaf= find_leaf(tree,val);
	if (leaf == TNULL) return 0.0;
	return leafcount(leaf)/leafcount(condleaf);
}

double prob_cond3(features type,valtype val,features ctype1,valtype cval1,features ctype2,valtype cval2,features ctype3,valtype cval3) {
	mindex condleaf,leaf,tree;
	condleaf= find_leaf3(ctype1,cval1,ctype2,cval2,ctype3,cval3);
	if (condleaf == TNULL) return 0.0;
	tree= find_nexttree_of_type(condleaf,type);
	if (tree == TNULL) {
		return 0.0; /* actually this is an undefined case; this feature was not counted */
	}
	leaf= find_leaf(tree,val);
	if (leaf == TNULL) return 0.0;
	return leafcount(leaf)/leafcount(condleaf);
}


double prob_2joint(features type1,valtype val1,features type2,valtype val2) {
	mindex tree,leaf;
	double totcount;
	if (T[type1] == TNULL) {
		return 0.0; /* actually this is an undefined case; this feature was not counted */
	}
	totcount= tree_count(T[type1]);
	leaf= find_leaf(T[type1],val1);
	if (leaf == TNULL) return 0.0;
	tree= find_nexttree_of_type(leaf,type2);
	if (tree == TNULL) {
		return 0.0; /* actually this is an undefined case; this feature was not counted */
	}
	leaf= find_leaf(tree,val2);
	if (leaf == TNULL) return 0.0;
	return leafcount(leaf)/totcount;
}

double prob_Njoint(int size,features type[],valtype val[]) {
	mindex tree=T[type[0]],leaf;
	double totcount;
	int i;
	if (tree == TNULL) {
		return 0.0; /* actually this is an undefined case; this feature was not counted */
	}
	totcount= tree_count(tree);
	for (i=1;i < size; i++) {
		leaf= find_leaf(tree,val[i-1]);
		if (leaf == TNULL) return 0.0;
		tree= find_nexttree_of_type(leaf,type[i]);
		if (tree == TNULL) {
			return 0.0; /* actually this is an undefined case; this feature was not counted */
		}
	}
	leaf= find_leaf(tree,val[size-1]);
	if (leaf == TNULL) return 0.0;
	return leafcount(leaf)/totcount;
}

/*****************************************************/
mindex find_nexttree_of_type(mindex leaf,features type) {
	mindex t;
	for (t=leafnexttree(leaf); t != TNULL; t=treenext(t)) { /* make common case quick */
		if (treetype(t) == type) {
			return t;
		}
	}
	return TNULL;
}

mindex get_nexttree_of_type(mindex leaf,features type) {
	mindex t;
	for (t=leafnexttree(leaf); t != TNULL; t=treenext(t)) { /* make common case quick */
		if (treetype(t) == type) {
			return t;
		}
	}
	t= leafnexttree(leaf);
	if (t == TNULL) {
		leafnexttree(leaf)= new_treeinfo(type);
		return leafnexttree(leaf);
	}
	for (; t != TNULL; t=treenext(t)) {
		if (treenext(t) == TNULL) { /* we are at end */
			treenext(t)= new_treeinfo(type);
			return treenext(t);
		}
	}
	return TNULL; /* just to keep cc -Wall from complaining :) */
}

/* increment the count of instance of val in the tree and return the leaf updated */
mindex incr_tree_value_count(mindex tree,valtype newval) {
	mindex root=treeroot(tree);
	if (root == TNULL) {
		mindex newleaf=new_leaf(newval);
		treeroot(tree)= asleaf(newleaf);
		return newleaf;
	}
	if (isleaf(root)) {
		mindex leaf= encleaf2mindex(root);
		valtype curval= leafvalue(leaf);
		
		if (curval == newval) {
			leafcount(leaf)++;
			return leaf;
		} else {
			mindex newleaf= new_leaf(newval); /* count is 1 */
			mindex node= new_int();
			intsum(node)= leafcount(leaf)+1;
			
			if (curval < newval) {
				intleft(node)= asleaf(leaf);
				intright(node)= asleaf(newleaf);
				intsortpt(node)= curval;
			} else {
				intleft(node)= asleaf(newleaf);
				intright(node)= asleaf(leaf);
				intsortpt(node)= newval;
			}
			/* no rebalancing possible now, so just set wait time to standard */
			intwait(node)= (unsigned short)wait_time(1,leafcount(leaf));
			treeroot(tree)= node;
			return newleaf;
		}
	} else {
		return increment_value_count(root,newval);
	}
}

/* increment the sum for this interior node and the counts and sums for subtrees containing the given value and return the leaf node for the value */
mindex increment_value_count(mindex node,valtype val) {
	mindex child,res;
	dmindex encchild;

	if (val <= intsortpt(node)) { /* going left */
		encchild= intleft(node);
	} else { /* going right */
		encchild= intright(node);
	}
	
	if (isleaf(encchild)) {
		child= encleaf2mindex(encchild);
		if (val == leafvalue(child)) { /* found the leaf */
			intsum(node)++;
			leafcount(child)++;	
			res= child;
		} else { /* need to add the leaf */
			if (val > leafvalue(child)) { /* higher than right node */
				res= add_node_above_to_right(node,val);
			} else if (val <= intsortpt(node)) { /* lower than left node */
				res= add_node_above_to_left(node,val);
			} else { /* in between */
				res= add_node_between(node,val);
			}
			/* note: "node" may have different children now */
		}
	} else { /* recurse */
		child= encchild;
		intsum(node)++;
		res= increment_value_count(child,val);
	}
	
	intwait(node)--;
	if (intwait(node) == 0) {/*printf("** rebalancing %X since got to 0 **\n",node);*/rebalance_subtree(node);}
	
	return res;
}

/* conceptually add a node 'node' and with a new leaf for val to right; return new leaf */
mindex add_node_above_to_right(mindex node,valtype val) {
	mindex leaf= new_leaf(val); /* count is 1 */
	/* to keep things local (esp since don't have parent node), make the new intnode like 'node' */
	mindex newint= dup_intnode(node);
	
	/* now reshape 'node' to have newint on left and leaf on right */
	intleft(node)= newint;
	intright(node)= asleaf(leaf);
	intsum(node)++; /* sum on newint + count on leaf */
	intsortpt(node)= largest_val(newint);

	rebalance_subtree(node);
	
	return leaf;
}

/* conceptually add a node 'node' and with a new leaf for val to left; return new leaf */
mindex add_node_above_to_left(mindex node,valtype val) {
	mindex leaf= new_leaf(val); /* count is 1 */
	/* to keep things local (esp since don't have parent node), make the new intnode like 'node' */
	mindex newint= dup_intnode(node);
	
	/* now reshape 'node' to have newint on right and leaf on left */
	intright(node)= newint;
	intleft(node)= asleaf(leaf);
	intsum(node)++; /* sum on newint + count on leaf */
	intsortpt(node)= val; /* val is largest value on left side */

	rebalance_subtree(node);
	
	return leaf;
}

/*  */
mindex add_node_between(mindex node,valtype val) {
	mindex leaf= new_leaf(val); /* count is 1 */

	mindex newint= new_int();
	intsortpt(newint)= val; /* val is largest value on left side */
	intleft(newint)= asleaf(leaf);
	intright(newint)= intright(node);
	intsum(newint)= count_or_sum(intright(node))+1;
	intright(node)= newint;
	intsum(node)++; /* counts stayed the same except adding 1 */
	
	rebalance_subtree(newint);
	
	return leaf;
}

/* regardless of wait counts, start rebalancing this tree from the root */
void rebalance_tree(mindex tree) {
	mindex root=treeroot(tree);
	if (root == TNULL) return;
	rebalance_subtree(root);
}

/* if given a leaf, do nothing.  Otherwise consider it time to try to rebalance this tree and recurse on new or moved interior nodes; reset the wait count on the node */
void rebalance_subtree(mindex encnode) {
	mindex node,left,right;
	int changed;

	if (isleaf(encnode)) return;
	node= encnode;

	do {
#ifdef NO_REBALANCE
break;
#endif
/*printf("rebalance_subtree(%X):",encnode);printtree2_shallow(encnode);
printf("\n",encnode);*/
		changed= 0;
		left= intleft(node);
		right= intright(node);
		
		if ((left != TNULL) && (right != TNULL) && out_of_balance(node)) {
			double lct=count_or_sum(left);
			double rct=count_or_sum(right);
			mindex left2,right2,newright,newleft;
			double l2ct,r2ct,newsum;
			double unbalanced_amount;
			if (lct > rct) {
				if (!isleaf(left)) {
					left2= intleft(left);
					right2= intright(left);
					l2ct= count_or_sum(left2);
					r2ct= count_or_sum(right2);
					newsum= r2ct+rct; /* sum for right interior node */
					unbalanced_amount= lct-rct;
					if (fabs(l2ct-newsum) < unbalanced_amount*0.999) { /* if improves balance */
/*printf("[rotating %X right improves balance (%f,%f) [%f]",node,l2ct,newsum,fabs(l2ct-newsum));
printf(" < (%f,%f)*0.999 [%f]\n",lct,rct,unbalanced_amount*0.999);*/
						/* rotate right */
						/* recycle "left" interior node into one for right */ 
						newright= left;
						intsortpt(newright)= largestval(right2);
						intleft(newright)= right2;
						intright(newright)= right;
						intsum(newright)= newsum;
						/* update "node" */
						intsortpt(node)= largestval(left2);
						intleft(node)= left2;
						intright(node)= newright;
						/* count stays the same */
						rebalance_subtree(newright);
						changed= 1;
					} else {
						mindex pprl,prl,rl,lrl,n;
						double rlct;
						valtype prl_largest;
						/*printf("rotating %X right would not improve balance (%.2f,%.2f) vs (%.2f,%.2f)\n",node,l2ct,newsum,lct,rct);*/
						/* find first node on the right edge of the "right2" tree that is smaller than unbalanced_amount (if any); also the parent (plr) and grandparent (pplr) */
						for (pprl= left,prl= right2; !isleaf(prl) && count_or_sum(intright(prl)) >= unbalanced_amount; pprl= prl,prl=intright(prl));
						if (!isleaf(prl)) {
							rl= intright(prl);
							/*printf("  so moving right of %X (%X) to left side\n",prl,rl);*/
							prl_largest= intsortpt(prl);
							rlct= count_or_sum(rl);
							lrl= intleft(prl);
							
							/* intsortpt(pprl) remains same */
							intright(pprl)= lrl;
							/* rl is now out of right of tree and prl can be recycled */
							/* use prl for node on right of "node", containing rl and "right" */
							newright= prl;
							intsortpt(newright)= largestval(rl);
							intleft(newright)= rl;
							intright(newright)= right;
							intsum(newright)= rct+count_or_sum(rl);
							intright(node)= newright;
							intsortpt(node)= prl_largest;
							/* update sums from left to pprl (inclusive) to reflect loss (rlct) of the node rl */
							for (n=left; 1; n=intright(n)) {
								intsum(n)-= rlct;
								if (n == pprl) break;
							}
							rebalance_subtree(newright);
							changed= 1;
						} /*else { printf("  and hit leaf (%X) before hitting node smaller than %.2f\n",encleaf2mindex(prl),unbalanced_amount);}*/
					}
				} /*else {printf("%X cannot be rotated right since left is a leaf node\n",node);}*/
			} else {
				if (!isleaf(right)) {
					left2= intleft(right);
					right2= intright(right);
					l2ct= count_or_sum(left2);
					r2ct= count_or_sum(right2);
					newsum= lct+l2ct; /* sum for right interior node */
					unbalanced_amount= rct-lct;
					if (fabs(r2ct-newsum) < unbalanced_amount*0.999) { /* if improves balance */
/*printf("[rotating %X left improves balance (%f,%f) [%f]",node,r2ct,newsum,fabs(r2ct-newsum));
printf(" < (%f,%f)*0.999 [%f]]\n",rct,lct,unbalanced_amount*0.999);*/
						/* rotate left */
						/* transform "right" interior node into one for left */
						newleft= right;
						intsortpt(newleft)= largestval(left);
						intleft(newleft)= left;
						intright(newleft)= left2;
						intsum(newleft)= newsum;
						/* update "node" */
						intsortpt(node)= largestval(left2);
						intleft(node)= newleft;
						intright(node)= right2;
						/* count stays the same */
						rebalance_subtree(newleft);
						changed= 1;
					} else {
						mindex pplr,plr,lr,rlr,n;
						double lrct;
						valtype lr_largest;
						/*printf("rotating %X left would not improve balance (%.2f,%.2f) vs (%.2f,%.2f)\n",node,r2ct,newsum,rct,lct);*/
						/* find first node on the left edge of the "left2" tree that is smaller than unbalanced_amount (if any); also the parent (plr) and grandparent (pplr) */
						for (pplr= right,plr= left2; !isleaf(plr) && count_or_sum(intleft(plr)) >= unbalanced_amount; pplr= plr,plr=intleft(plr));
						if (!isleaf(plr)) {
							lr= intleft(plr);
							/*printf("  so moving left of %X (%X) to right side\n",plr,lr);*/
							lr_largest= intsortpt(plr);
							lrct= count_or_sum(lr);
							rlr= intright(plr);
							
							intsortpt(pplr)= largestval(rlr);
							intleft(pplr)= rlr;
							/* lr is now out of right of tree and plr can be recycled */
							/* use plr for node on left of node, containing left and lr */
							newleft= plr;
							intsortpt(newleft)= largestval(left);
							intleft(newleft)= left;
							intright(newleft)= lr;
							intsum(newleft)= lct+count_or_sum(lr);
							intleft(node)= newleft;
							intsortpt(node)= lr_largest;
							/* update sums from right to pplr (inclusive) to reflect loss (lrct) of the node lr */
							for (n=right; 1; n=intleft(n)) {
								intsum(n)-= lrct;
								if (n == pplr) break;
							}
							rebalance_subtree(newleft);
							changed= 1;
						}/* else { printf("  and hit leaf (%X) before hitting node smaller than %.2f\n",encleaf2mindex(plr),unbalanced_amount);}*/
					}
				}/* else {printf("%X cannot be rotated left since right is a leaf node\n",node);}*/
			}
		}/* else {printf("%X is not out of balance\n",node);}*/
	} while (changed);
	
	/* note: right and left of node may have changed */

	/* reset the wait count */
	intwait(node)= (unsigned short)wait_time(count_or_sum(intleft(node)),count_or_sum(intright(node)));
}

int out_of_balance(mindex node) {
	double lct=count_or_sum(intleft(node));
	double rct=count_or_sum(intright(node));
	return (fabs(lct-rct) > 1) && (((rct>lct)?rct/lct:lct/rct)>=2.0);
}

void free_all_in_tree(mindex tree) {
/*printf("free_all_in_tree(%X)\n",tree);*/
	if (tree != TNULL) {
		if (treeroot(tree) != TNULL) free_all_in_subtree(treeroot(tree));
		free_treeinfo(tree);
	}
}

void free_all_in_subtree(dmindex encnode) {
	mindex node,t,next;
/*printf("free_all_in_subtree(%X)\n",encnode);*/
	if (isleaf(encnode)) {
		node= encleaf2mindex(encnode);
		for (t=leafnexttree(node); t != TNULL; t=next) {
			next= treenext(t);
			free_all_in_tree(t);
		}
		free_leaf(node);
	} else {
		node= encnode;
		if (intleft(node) != TNULL) free_all_in_subtree(intleft(node));
		if (intright(node) != TNULL) free_all_in_subtree(intright(node));
		free_int(node);
	}	
}

void scale_and_prune_all_trees(double factor,double threshold) {
	int i;
	for (i=0; i < NUM_FEATURES; i++) {
		if (T[i] != TNULL) scale_and_prune_tree(T[i],factor,threshold);
	}
}

void scale_and_prune_tree(mindex tree,double factor,double threshold) {
	double change;
	valtype newrightmost;
	if (treeroot(tree) != TNULL) treeroot(tree)= scale_and_prune_subtree(treeroot(tree),factor,threshold,&change,&newrightmost);
}

dmindex scale_and_prune_subtree(dmindex encnode,double factor,double threshold,double *change,valtype *newrightmost) {
	mindex node,t;
	int a_leaf= isleaf(encnode);

	/* scale ourselves */
	if (a_leaf) {
		node= encleaf2mindex(encnode);
		leafcount(node)*= factor;
	} else {
		node= encnode;
		intsum(node)*= factor;
	}
	
	/* if we get too small, delete us and return TNULL and how much weight we had */
	if (count_or_sum(encnode) < threshold) {
/*printf("Deleting %X:\n",encnode);printtree2(encnode,"");printf("\n");*/
		*change= count_or_sum(encnode);
		*newrightmost= NOT_A_SORTPT; /* we don't have the info */
		free_all_in_subtree(encnode);
		return TNULL;
	}
	
	/* scale below us and react to reported changes */
	if (a_leaf) {
		*change= 0.0;
		*newrightmost= NOT_A_SORTPT;
				
		for (t=leafnexttree(node); t != TNULL; t=treenext(t)) {
			scale_and_prune_tree(t,factor,threshold);
		}
	} else {
		dmindex left,right;
		double mychange,reduced=0.0;
		left= intleft(node);
		right= intright(node);
		
		if (left != TNULL) {
			valtype leftnewrightmost; /* we want this to update our sortpt, if there is a change, and the rightmost has changed */
			intleft(node)= scale_and_prune_subtree(left,factor,threshold,change,&leftnewrightmost);
			if (*change > 0.0) {
				reduced+= *change;
				if (leftnewrightmost != NOT_A_SORTPT) intsortpt(node)= leftnewrightmost; /* there is a new rightmost on left side */
			}
		}
		if (right != TNULL) {
			intright(node)= scale_and_prune_subtree(right,factor,threshold,&mychange,newrightmost);
			if (mychange > 0.0) {
				reduced+= mychange;
			}
		} else {
			*newrightmost= NOT_A_SORTPT;
		}
		*change= reduced;
		
		left= intleft(node);
		right= intright(node);
		if (right == TNULL && left != TNULL) *newrightmost= largestval(left);
		
		if (left == TNULL || right == TNULL) { /* at least one child is gone, so delete self and return whats left */
			free_int(node);
			if (left == TNULL) {
				return right; /* might be TNULL */
			} else {
				return left;
			}
		} else { /* nothing deleted at this level */
			intsum(node)-= reduced;
		}
	}
	return encnode;
}


/* return the largest value found below this interior node */
/* note: sometimes called from the macro function largestval(node) */
valtype largest_val(mindex node) {
	dmindex encright= intright(node);
	if (isleaf(encright)) {
		return eleafval(encright);
	} else {
		return largest_val(encright);
	}
}

/* make a new intermediate node identical to the give one */
mindex dup_intnode(mindex node) {
	mindex newint= new_int();
	intsortpt(newint)= intsortpt(node);
	intleft(newint)= intleft(node);
	intright(newint)= intright(node);
	intsum(newint)= intsum(node);
	intwait(newint)= intwait(node);
	return newint;
}


/* return the leaf representing val in the tree else TNULL */
mindex find_leaf(mindex tree,valtype val) {
	mindex root=treeroot(tree);
	if (root == TNULL) {
		return TNULL;
	}
	return find_leaf_in_subtree(root,val);
}


/* return the leaf below this interior or leaf [encoded] node else TNULL */
mindex find_leaf_in_subtree(dmindex encchild,valtype val) {
	mindex child;
	if (encchild == TNULL) return TNULL;
	
	if (isleaf(encchild)) {
		child= encleaf2mindex(encchild);
		if (val == leafvalue(child)) { /* found the leaf */
			return child;
		} else { /* leaf not present */
			return TNULL;
		}
	} else {
		child= encchild;
	}
	
	if (val <= intsortpt(child)) { /* go left */
		return find_leaf_in_subtree(intleft(child),val);
	} else { /* go right */
		return find_leaf_in_subtree(intright(child),val);
	}	
}

/* return the leaf representing val2 in the tree of type2 below the leaf representing val1 top level tree of type1 else TNULL */
mindex find_leaf2(features type1,valtype val1,features type2,valtype val2) {
	mindex leaf,tree;
	if (T[type1] == TNULL) {
		return TNULL; /* actually this is an undefined case; this feature was not counted */
	}
	leaf= find_leaf(T[type1],val1);
	if (leaf == TNULL) return TNULL;
	tree= find_nexttree_of_type(leaf,type2);
	if (tree == TNULL) return TNULL; /* actually this is an undefined case; this feature was not counted */
	return find_leaf(tree,val2);
}

/* return the leaf representing val3 in the tree of type3 below the leaf representing val2 in the tree of type2 below the leaf representing val1 top level tree of type1 else TNULL */
mindex find_leaf3(features type1,valtype val1,features type2,valtype val2,features type3,valtype val3) {
	mindex leaf,tree;
	if (T[type1] == TNULL) {
		return TNULL; /* actually this is an undefined case; this feature was not counted */
	}
	leaf= find_leaf(T[type1],val1);
	if (leaf == TNULL) return TNULL;
	tree= find_nexttree_of_type(leaf,type2);
	if (tree == TNULL) return TNULL; /* actually this is an undefined case; this feature was not counted */
	leaf= find_leaf(tree,val2);
	if (leaf == TNULL) return TNULL;
	tree= find_nexttree_of_type(leaf,type3);
	if (tree == TNULL) return TNULL; /* actually this is an undefined case; this feature was not counted */
	return find_leaf(tree,val3);
}


/*****************************************************/

float feature_trees_stats(features f,float *amind,float *amaxd,float *aaved,float *awaved) {
	unsigned int tot_num_leaves=0,tot_mind=0,tot_maxd=0,tree_count=0;
	float tot_aved=0.0,tot_waved=0.0;
	unsigned int sum_num_leaves,sum_mind,sum_maxd;
	float sum_aved,sum_waved;
	int i;
	for (i=0; i < NUM_FEATURES; i++) {
		if (T[i] != TNULL) {
			tree_count+= feature_tree_stats(T[i],f,&sum_mind,&sum_maxd,&sum_aved,&sum_waved,&sum_num_leaves);
			tot_num_leaves+= sum_num_leaves;
			tot_mind+= sum_mind;
			tot_maxd+= sum_maxd;
			tot_aved+= sum_aved;
			tot_waved+= sum_waved;
		}
	}
	if (tree_count == 0) return 0; /* no non-empty trees of this type */
	*amind= tot_mind/((float)tree_count);
	*amaxd= tot_maxd/((float)tree_count);
	*aaved= tot_aved/((float)tree_count);
	*awaved= tot_waved/((float)tree_count);
	return tot_num_leaves/((float)tree_count);
}

unsigned int feature_tree_stats(mindex tree,features f,unsigned int *smind,unsigned int *smaxd,float *saved,float *swaved,unsigned int *snum_leaves) {
	unsigned int tree_count= 0;
	dmindex root= treeroot(tree);
	if (root != TNULL) {
		tree_count= feature_subtree_stats(root,f,smind,smaxd,saved,swaved,snum_leaves);
		if (treetype(tree) == f) {
			/* gather stats from this tree */
			unsigned int mind,maxd;
			float aved,waved;
			*snum_leaves+= tree_stats(tree,&mind,&maxd,&aved,&waved);
			*smind+= mind;
			*smaxd+= maxd;
			*saved+= aved;
			*swaved+= waved;
			tree_count++;
		}
	}
	return tree_count;
}

unsigned int feature_subtree_stats(mindex encnode,features f,unsigned int *smind,unsigned int *smaxd,float *saved,float *swaved,unsigned int *snum_leaves) {
	mindex node,t;
	unsigned int new_smind,new_smaxd,new_snum_leaves;
	float new_saved,new_swaved;
	
	unsigned int tree_count= 0;
	*smind= *smaxd= *snum_leaves= 0;
	*saved= *swaved= 0.0;
	
	if (isleaf(encnode)) {
		node= encleaf2mindex(encnode);

		for (t=leafnexttree(node); t != TNULL; t=treenext(t)) {
			tree_count+= feature_tree_stats(t,f,&new_smind,&new_smaxd,&new_saved,&new_swaved,&new_snum_leaves);
			*smind+= new_smind;
			*smaxd+= new_smaxd;
			*saved+= new_saved;
			*swaved+= new_swaved;
			*snum_leaves+= new_snum_leaves;
		}
	} else {
		node= encnode;
		if (intleft(node) != TNULL) {
			tree_count+= feature_subtree_stats(intleft(node),f,&new_smind,&new_smaxd,&new_saved,&new_swaved,&new_snum_leaves);
			*smind+= new_smind;
			*smaxd+= new_smaxd;
			*saved+= new_saved;
			*swaved+= new_swaved;
			*snum_leaves+= new_snum_leaves;
		}
		if (intright(node) != TNULL) {
			tree_count+= feature_subtree_stats(intright(node),f,&new_smind,&new_smaxd,&new_saved,&new_swaved,&new_snum_leaves);
			*smind+= new_smind;
			*smaxd+= new_smaxd;
			*saved+= new_saved;
			*swaved+= new_swaved;
			*snum_leaves+= new_snum_leaves;
		}
	}
	return tree_count;
}

unsigned int tree_stats(mindex tree,unsigned int *mind,unsigned int *maxd,float *aved,float *waved) {
	unsigned int num_leafs= num_leaves(tree);
	unsigned int tot= tree_depth_total(tree);
	double wtot= weighted_tree_depth_total(tree);
	tree_min_max_depth(tree,mind,maxd);
	*aved= (float)tot/num_leafs;	
	*waved= (float)(wtot/tree_count(tree));	
/*printf("tree_stats results for tree %X: min depth=%u; max depth=%u; ave depth=%.2f; w. ave depth=%.2f; # vals repr=%u\n",tree,*mind,*maxd,*aved,*waved,num_leafs);*/
	return num_leafs;
}

double tree_count(mindex tree) {
	mindex root=treeroot(tree);
	if (root == TNULL) {
		return 0.0;
	}
	return count_or_sum(root);
}

unsigned int num_leaves(mindex tree) {
	mindex root=treeroot(tree);
	if (root == TNULL) {
		return 0;
	}
	return num_subtree_leaves(root);
}

unsigned int num_subtree_leaves(mindex encnode) {
	if (isleaf(encnode)) {
		return 1;
	} else {
		int count= 0;
		if (intleft(encnode) != TNULL) count+=num_subtree_leaves(intleft(encnode));
		if (intright(encnode) != TNULL) count+=num_subtree_leaves(intright(encnode));
		return count;
	}
}

unsigned int tree_depth_total(mindex tree) {
	mindex root=treeroot(tree);
	if (root == TNULL) {
		return 0;
	}
	return subtree_depth_total(root,0);
}

unsigned int subtree_depth_total(mindex encnode,unsigned int depth) {
	depth++;
	if (isleaf(encnode)) {
		return depth;
	} else {
		int count= 0;
		if (intleft(encnode) != TNULL) count+=subtree_depth_total(intleft(encnode),depth);
		if (intright(encnode) != TNULL) count+=subtree_depth_total(intright(encnode),depth);
		return count;
	}
}

double weighted_tree_depth_total(mindex tree) {
	mindex root=treeroot(tree);
	if (root == TNULL) {
		return 0;
	}
	return weighted_subtree_depth_total(root,0);
}

double weighted_subtree_depth_total(mindex encnode,unsigned int depth) {
	depth++;
	if (isleaf(encnode)) {
		return depth*leafnode(encleaf2mindex(encnode)).count;
	} else {
		double count= 0;
		if (intleft(encnode) != TNULL) count+=weighted_subtree_depth_total(intleft(encnode),depth);
		if (intright(encnode) != TNULL) count+=weighted_subtree_depth_total(intright(encnode),depth);
		return count;
	}
}


void tree_min_max_depth(mindex tree,unsigned int *mind,unsigned int *maxd) {
	mindex root=treeroot(tree);
	
	if (root == TNULL) {
		*mind= *maxd= 0;
	} else {
		*mind= MAX_U32; /* this is the num of leaf vals, so a safe min */
		*maxd= 0;
		subtree_min_max_depth(root,mind,maxd,0);
	}
}

void subtree_min_max_depth(mindex encnode,unsigned int *mind,unsigned int *maxd,unsigned int depth) {
	depth++;
	if (isleaf(encnode)) {
		if (*mind > depth) {
			*mind= depth;
		}
		if (*maxd < depth) {
			*maxd= depth;
		}
	} else {
		if (intleft(encnode) != TNULL) subtree_min_max_depth(intleft(encnode),mind,maxd,depth);
		if (intright(encnode) != TNULL) subtree_min_max_depth(intright(encnode),mind,maxd,depth);
	}
}

/*****************************************************/
/*****************************************************/

/* print to the given FILE the given features and values in the form <featurename>=<value>, separated by commas; depth is the depth to look in the arrays */
void write_feat_val_list(FILE *f,int depth,features feats[],valtype vals[]) {
	int i;
	if (depth == 0) return;
	fprintf(f,"%s=%u",featurename[feats[0]],vals[0]);
	for (i=1; i < depth; i++) {
		fprintf(f,",%s=%d",featurename[feats[i]],vals[i]);
	}
}

/* write a display of all unconditional probabilities to the given FILE */
void write_all_uncond_probs(FILE *f) {
	int i;
	features feats[NUM_FEATURES];
	valtype vals[NUM_FEATURES];
	for (i=0; i < NUM_FEATURES; i++) {
		if (T[i] != TNULL) write_all_tree_uncond_probs(f,T[i],0,feats,vals,count_or_sum(tree(T[i]).root));
	}
}

/* write a display of all uncond probabilities rooted at this tree to the given FILE; depth is the last depth completed */
void write_all_tree_uncond_probs(FILE *f,mindex tree,int depth,features feats[],valtype vals[],double treesum) {
	dmindex root= treeroot(tree);
	if (root == TNULL) return;
	feats[depth]= treetype(tree);
	depth++;
	write_all_subtree_uncond_probs(f,root,depth,feats,vals,treesum);
}

/* write a display of all uncond probabilities below this interior or leaf node (as encoded) to the given FILE; depth is the depth that we are at */
void write_all_subtree_uncond_probs(FILE *f,dmindex encnode,int depth,features feats[],valtype vals[],double treesum) {
	mindex node,t;

	if (isleaf(encnode)) {
		node= encleaf2mindex(encnode);
		vals[depth-1]= leafvalue(node);
		fprintf(f,"P(");
		write_feat_val_list(f,depth,feats,vals);
		fprintf(f,")= %.12f\n",leafcount(node)/treesum);
		
		for (t=leafnexttree(node); t != TNULL; t=treenext(t)) {
			write_all_tree_uncond_probs(f,t,depth,feats,vals,treesum);
		}
	} else {
		node= encnode;
		if (intleft(node) != TNULL) write_all_subtree_uncond_probs(f,intleft(node),depth,feats,vals,treesum);
		if (intright(node) != TNULL) write_all_subtree_uncond_probs(f,intright(node),depth,feats,vals,treesum);
	}
}

/*****************************************************/

/* write a display of all conditional probabilities to the given FILE */
void write_all_cond_probs(FILE *f) {
	int i;
	features feats[NUM_FEATURES];
	valtype vals[NUM_FEATURES];
	for (i=0; i < NUM_FEATURES; i++) {
		if (T[i] != TNULL) write_all_tree_cond_probs(f,T[i],0,feats,vals);
	}
}

/* write a display of all conditional probabilities rooted at this tree to the given FILE; depth is the last depth completed */
void write_all_tree_cond_probs(FILE *f,mindex tree,int depth,features feats[],valtype vals[]) {
	dmindex root= treeroot(tree);
	if (root == TNULL) return;
	feats[depth]= treetype(tree);
	depth++;
	write_all_subtree_cond_probs(f,root,depth,feats,vals,count_or_sum(root));
}

/* write a display of all conditional probabilities below this interior or leaf node (as encoded) to the given FILE; depth is the depth that we are at */
void write_all_subtree_cond_probs(FILE *f,dmindex encnode,int depth,features feats[],valtype vals[],double treesum) {
	mindex node,t;

	if (isleaf(encnode)) {
		node= encleaf2mindex(encnode);
		vals[depth-1]= leafvalue(node);
		if (depth > 1) {
			fprintf(f,"P(%s=%u|",featurename[feats[depth-1]],vals[depth-1]);
			write_feat_val_list(f,depth-1,feats,vals);
			fprintf(f,")= %.12f\n",leafcount(node)/treesum);
		}
		
		for (t=leafnexttree(node); t != TNULL; t=treenext(t)) {
			write_all_tree_cond_probs(f,t,depth,feats,vals);
		}
	} else {
		node= encnode;
		if (intleft(node) != TNULL) write_all_subtree_cond_probs(f,intleft(node),depth,feats,vals,treesum);
		if (intright(node) != TNULL) write_all_subtree_cond_probs(f,intright(node),depth,feats,vals,treesum);
	}
}

/*****************************************************/

void write_featurecomb(featcomb C,double val,int depth,features feats[]) {
	int i;
	featcomb c= C;
	for (i=0; i < (depth-1); i++) {
		c= c->next[feats[i]];
	}
	c->val[feats[depth-1]]= val;
}

void inc_featurecomb(featcomb C,double val,int depth,features feats[]) {
	int i;
	featcomb c= C;
	for (i=0; i < (depth-1); i++) {
		c= c->next[feats[i]];
	}
	c->val[feats[depth-1]]+= val;
}

featcomb create_featurecomb(int depth,double val) {
	int i;
	featcomb root= (featcomb)malloc(sizeof(struct _featcomb));
	for (i=0; i < NUM_FEATURES; i++) {
		root->val[i]= val;
		if (depth > 1) {
			root->next[i]= create_featurecomb(depth-1,val);
		} else {
			root->next[i]= NULL;
		}
	}
	return root;
}

void scale_all_featurecomb(featcomb c,double factor) {
	int i;
	for (i=0; i < NUM_FEATURES; i++) {
		c->val[i]*= factor;
		if (c->next[i] != NULL) scale_all_featurecomb(c->next[i],factor);
	}
}


featcomb calc_all_entropies() {
	features feats[NUM_FEATURES];
	featcomb H=	create_featurecomb(NUM_FEATURES,0.0);
	int i;
	for (i=0; i < NUM_FEATURES; i++) {
		if (T[i] != TNULL) {
			add_all_tree_entrsum(H,T[i],0,feats,tree_count(T[i]));
		}
	}
	return H;
}

void add_all_tree_entrsum(featcomb c,mindex tree,int depth,features feats[],double totsum) {
	dmindex root= treeroot(tree);
	if (root == TNULL) return;
	feats[depth]= treetype(tree);
	depth++;
	add_all_subtree_entrsum(c,root,depth,feats,count_or_sum(root),totsum);
}

void add_all_subtree_entrsum(featcomb c,dmindex encnode,int depth,features feats[],double treesum,double totsum) {
	mindex node,t;

	if (isleaf(encnode)) {
		double mysumcomp,myprob,condprob;
		node= encleaf2mindex(encnode);
		myprob= leafcount(node)/totsum;
		if (depth > 1) {
			condprob= leafcount(node)/treesum;
			mysumcomp= -1*myprob*(log(condprob)/LOG2);
			/*printf("H[");
			write_feature_names(stdout,depth,feats);
			printf("]+=%f (-%f*(log(%f)/log2); myprob=%f/%f\n",mysumcomp,myprob,condprob,leafcount(node),totsum);*/
		} else {
			mysumcomp= -1*myprob*(log(myprob)/LOG2);
		}
		inc_featurecomb(c,mysumcomp,depth,feats);
		
		for (t=leafnexttree(node); t != TNULL; t=treenext(t)) {
			add_all_tree_entrsum(c,t,depth,feats,totsum);
		}
	} else {
		node= encnode;
		if (intleft(node) != TNULL) add_all_subtree_entrsum(c,intleft(node),depth,feats,treesum,totsum);
		if (intright(node) != TNULL) add_all_subtree_entrsum(c,intright(node),depth,feats,treesum,totsum);
	}
}

void write_all_entropies(FILE *f,featcomb c) {
	int i;
	features feats[NUM_FEATURES];
	for (i=0; i < NUM_FEATURES; i++) {
		if (c->val[i] > 0) {
			fprintf(f,"H(%s)=%.8f\n",featurename[i],c->val[i]);
		}
	}
	for (i=0; i < NUM_FEATURES; i++) {
		if (c->next[i] != NULL) {
			feats[0]= i;
			write_all_entropies2(f,c->next[i],1,feats);
		}
	}
}

void write_all_entropies2(FILE *f,featcomb c,int depth,features feats[]) {
	int i;
	for (i=0; i < NUM_FEATURES; i++) {
		if (c->val[i] > 0) {
			fprintf(f,"H(%s|",featurename[i]);
			write_feature_names(f,depth,feats);
			fprintf(f,")=%.8f\n",c->val[i]);
		}
	}
	for (i=0; i < NUM_FEATURES; i++) {
		if (c->next[i] != NULL) {
			feats[depth]= i;
			write_all_entropies2(f,c->next[i],depth+1,feats);
		}
	}
}

/* print to the given FILE the given features separated by commas; depth is the depth to look in the array */
void write_feature_names(FILE *f,int depth,features feats[]) {
	int i;
	if (depth == 0) return;
	fprintf(f,"%s",featurename[feats[0]]);
	for (i=1; i < depth; i++) {
		fprintf(f,",%s",featurename[feats[i]]);
	}
}

/*****************************************************/

void print_all_trees() {
	int i;
	for (i=0; i < NUM_FEATURES; i++) {
		if (T[i] != TNULL) {
			printtree(T[i],"");
		}
	}
}

void printtree(mindex tree,char *ind) {
	mindex t;
	for (t=tree; t != TNULL; t=treenext(t)) {
		printf("%sTree %X of %s: ",ind,t,featurename[treetype(t)]);
		printtree2(treeroot(t),ind);
		printf("\n");
	}
}

void printtree2(dmindex encnode,char *ind) {
	mindex node;
	char myind[4*NUM_FEATURES+1];
	if (encnode == TNULL) {
		printf("NULL");
	} else if (isleaf(encnode)) {
		node=encleaf2mindex(encnode);
		printf("{%X: %dx%.2f",node,leafvalue(node),leafcount(node));
		if (leafnexttree(node) != TNULL) {
			sprintf(myind,"    %s",ind);
			printf(" ->{{\n");
			printtree(leafnexttree(node),myind);
			printf("%s}}",ind);
		}
		printf("}");
	} else {
		node= encnode;
		printf("[%X: <=%d (%.2f) W=%d ",node,intsortpt(node),intsum(node),intwait(node));
		printtree2(intleft(node),ind);
		printf(" ");
		printtree2(intright(node),ind);
		printf("]");
	}
}

void printtree_shallow(mindex tree) {
	printf("Tree %X of %s: ",tree,featurename[treetype(tree)]);
	printtree2_shallow(treeroot(tree));
	printf("\n");
}

void printtree2_shallow(dmindex encnode) {
	mindex node;
	if (encnode == TNULL) {
		printf("NULL");
	} else if (isleaf(encnode)) {
		node=encleaf2mindex(encnode);
		printf("{%X: %dx%.2f",node,leafvalue(node),leafcount(node));
		printf("}");
	} else {
		node= encnode;
		printf("[%X: <=%d (%.2f) ",node,intsortpt(node),intsum(node));
		printtree2_shallow(intleft(node));
		printf(" ");
		printtree2_shallow(intright(node));
		printf("]");
	}
}

/*****************************************************/

int sanity_check_trees() {
	int i,numerrs=0;
	for (i=0; i < NUM_FEATURES; i++) {
		if (T[i] != TNULL) {
			numerrs+= sanity_check_tree(T[i]);
		}
	}
	return numerrs;
}

int sanity_check_tree(mindex tree) {
	dmindex root= treeroot(tree);
	int numerrs= 0;
	if (treetype(tree) < 0 || treetype(tree) >= NUM_FEATURES) {
		fprintf(stderr,"*** integrity check failure: type of %X is not valid (%d)\n",tree,treetype(tree));
		numerrs++;
	}
	if (treeroot(tree) != TNULL) {
	 	numerrs+= sanity_check_subtree(root);
	}
	return numerrs;
}

int sanity_check_subtree(dmindex encnode) {
	int numerrs= 0;
	mindex node,t;
	double count,sum;
	
	if (isleaf(encnode)) {
		node= encleaf2mindex(encnode);
		count= leafcount(node);
		if (count <= 0.0) {
			fprintf(stderr,"*** integrity check failure: count on leaf %X is negative or 0 (%f)\n",node,count);
			numerrs++;
		}
		for (t=leafnexttree(node); t != TNULL; t=treenext(t)) {
			/* can check if our count is approx that of the root's child */
			numerrs+= sanity_check_tree(t);
		}
	}  else {
		dmindex left,right;
		node= encnode;
		sum= intsum(node);
		left= intleft(node);
		right= intright(node);
		if ((left != TNULL) && (right != TNULL)) {
			double lct= count_or_sum(left);
			double rct= count_or_sum(right);
			double ratio= (lct+rct)/sum;
			if (ratio < 0.999 || ratio > 1.001) {
				fprintf(stderr,"*** integrity check failure: sum on interior node %X (%f) does not match sum/counts on leaves (%f+%f)\n",node,sum,lct,rct);
				numerrs++;
			}
		}
		if (left == TNULL) {
			fprintf(stderr,"*** integrity check failure: left of interior node %X is TNULL\n",node);
			numerrs++;
		} else {
			numerrs+= sanity_check_subtree(left);
		}
		if (right == TNULL) {
			fprintf(stderr,"*** integrity check failure: right of interior node %X is TNULL\n",node);
			numerrs++;
		} else {
			numerrs+= sanity_check_subtree(right);
		}
		if ((left != TNULL) && (right != TNULL)) {
			if (largestval(left) != intsortpt(node)) {
				fprintf(stderr,"*** integrity check failure: sortpoint on interior node %X (%d) does not match largest value on left (%d)\n",node,intsortpt(node),largestval(left));
				numerrs++;
			}
		}	
	}
	return numerrs;
}


/* $Id: spp_anomsensor.c,v 1.5 2001/01/25 19:16:16 fygrave Exp $ */
/*********************************************************************
anommem.c, distributed as part of Spade v092200.1
Author: James Hoagland, Silicon Defense (hoagland@SiliconDefense.com)
copyright (c) 2000 by Silicon Defense (http://www.silicondefense.com/)
Released under GNU General Public License, see the COPYING file included
with the distribution or http://www.silicondefense.com/spice/ for details.

anommem.h contains all the routines for Spade tree memory managent.

Please send complaints, kudos, and especially improvements and bugfixes to
hoagland@SiliconDefense.com.  As described in GNU General Public License, no
warranty is expressed for this program.
*********************************************************************/


/* initialize the memory manager */
void init_mem() {
	ROOT_BLOCK_BITS= DEFAULT_ROOT_BLOCK_BITS;
	INT_BLOCK_BITS= DEFAULT_INT_BLOCK_BITS;
	LEAF_BLOCK_BITS= DEFAULT_LEAF_BLOCK_BITS;
	MAX_ROOT_BLOCKS= DEFAULT_MAX_ROOT_BLOCKS;
	MAX_INT_BLOCKS= DEFAULT_MAX_INT_BLOCKS;
	MAX_LEAF_BLOCKS= DEFAULT_MAX_LEAF_BLOCKS;

	TNULL= -1; /* all 1's */
	DMINDEXMASK= 1 << (sizeof(dmindex)*8-1);

	root_freelist=TNULL;
	int_freelist=TNULL;
	leaf_freelist=TNULL;

	allocate_mem_blocks();
}

void allocate_mem_blocks() {
	unsigned int i;

	ROOT_M=(treeroot **)malloc(sizeof(treeroot *)*MAX_ROOT_BLOCKS);
	for (i=0; i < MAX_ROOT_BLOCKS; i++) ROOT_M[i]= NULL;
	INT_M=  (intnode **)malloc(sizeof(intnode *)*MAX_INT_BLOCKS);
	for (i=0; i < MAX_INT_BLOCKS; i++) INT_M[i]= NULL;
	LEAF_M=(leafnode **)malloc(sizeof(leafnode *)*MAX_LEAF_BLOCKS);
	for (i=0; i < MAX_LEAF_BLOCKS; i++) LEAF_M[i]= NULL;
}

/* allocate a new treeroot node with the give feature type and return it */
mindex new_treeinfo(features type) {
	mindex root;
	int i,p;
	if (root_freelist == TNULL) { /* need to allocate a new block */
		/* find first unused block */
		for (p=0; p < (int)MAX_ROOT_BLOCKS && (ROOT_M[p] != NULL); p++) {}
		if (p == (int)MAX_ROOT_BLOCKS) {
			fprintf(stderr,"exhausted all %d blocks of %d treeroots; exiting; you might want to increase DEFAULT_MAX_ROOT_BLOCKS or DEFAULT_ROOT_BLOCK_SIZE in params.h\n",MAX_ROOT_BLOCKS,ROOT_BLOCK_SIZE);
			printf("next free root: %X; int: %X, leaf: %X\n",root_freelist,int_freelist,leaf_freelist);
			exit(1);
		}
		ROOT_M[p]= (treeroot *)calloc(ROOT_BLOCK_SIZE,sizeof(treeroot));
		if (ROOT_M[p] == NULL) {
			fprintf(stderr,"Out of memory! in allocation of new treeroot block; exiting");
			exit(2);
		}
		/* add new slots to freelist */
		root_freelist= root_index(p,0);
		for (i=0; i < (ROOT_BLOCK_SIZE-1); i++) {
#ifdef EXTRA_MARK_FREE
			ROOT_M[p][i].root= TNULL;
#endif
			rfreenext(ROOT_M[p][i])= root_index(p,i+1);
		}
#ifdef EXTRA_MARK_FREE
		ROOT_M[p][ROOT_BLOCK_SIZE-1].root= TNULL;
#endif
		rfreenext(ROOT_M[p][ROOT_BLOCK_SIZE-1])= TNULL;
	}
	/* give out the head and make its next the new head */
	root= root_freelist;
	root_freelist= rfreenext(tree(root_freelist));
	treetype(root)= type;
	treeroot(root)= TNULL;
	treenext(root)= TNULL;
	return root;
}

/* free the treeroot node given */
void free_treeinfo(mindex f) {
#ifdef EXTRA_MARK_FREE
	treeroot(f)= TNULL;
#endif
	/* add it to the start of the list */
	rfreenext(tree(f))= root_freelist;
	root_freelist= f;
}


/* allocate a new intnode node and return it */
mindex new_int() {
	mindex res;
	int i,p;
	if (int_freelist == TNULL) { /* need to allocate a new block */
		/* find first unused block */
		for (p=0; p < (int)MAX_INT_BLOCKS && (INT_M[p] != NULL); p++) {}
		if (p == (int)MAX_INT_BLOCKS) {
			fprintf(stderr,"exhausted all %d blocks of %d intnodes; exiting; you might want to increase DEFAULT_MAX_INT_BLOCKS or DEFAULT_INT_BLOCK_SIZE in params.h\n",MAX_INT_BLOCKS,INT_BLOCK_SIZE);
			printf("next free root: %X; int: %X, leaf: %X\n",root_freelist,int_freelist,leaf_freelist);
			exit(1);
		}
		INT_M[p]= (intnode *)calloc(INT_BLOCK_SIZE,sizeof(intnode));
		if (INT_M[p] == NULL) {
			fprintf(stderr,"Out of memory! in allocation of new intnode block; exiting");
			exit(2);
		}
		/* add new slots to freelist */
		int_freelist= intnode_index(p,0);
		for (i=0; i < (INT_BLOCK_SIZE-1); i++) {
#ifdef EXTRA_MARK_FREE
			INT_M[p][i].sum= -1;
#endif
			ifreenext(INT_M[p][i])= intnode_index(p,i+1);
		}
#ifdef EXTRA_MARK_FREE
		INT_M[p][INT_BLOCK_SIZE-1].sum= -1;
#endif
		ifreenext(INT_M[p][INT_BLOCK_SIZE-1])= TNULL;
	}
	/* give out the head and make its next the new head */
	res= int_freelist;
	int_freelist= ifreenext(intnode(int_freelist));
	intleft(res)= intright(res)= TNULL;
	intsum(res)=0;
	intsortpt(res)= NOT_A_SORTPT;
	intwait(res)= 999;
	return res;
}

/* free the intnode node given */
void free_int(mindex f) {
#ifdef EXTRA_MARK_FREE
	intsum(f)= -1;
#endif
	/* add it to the start of the list */
	ifreenext(intnode(f))= int_freelist;
	int_freelist= f;
}

/* allocate a new leafnode node and return it */
mindex new_leaf(valtype val) {
	mindex res;
	int i,p;
	if (leaf_freelist == TNULL) { /* need to allocate a new block */
		/* find first unused block */
		for (p=0; p < (int)MAX_LEAF_BLOCKS && (LEAF_M[p] != NULL); p++) {}
		if (p == (int)MAX_LEAF_BLOCKS) {
			fprintf(stderr,"exhausted all %d blocks of %d leafnodes; exiting; you might want to increase DEFAULT_LEAF_ROOT_BLOCKS or DEFAULT_LEAF_BLOCK_SIZE in params.h\n",MAX_LEAF_BLOCKS,LEAF_BLOCK_SIZE);
			printf("next free root: %X; int: %X, leaf: %X\n",root_freelist,int_freelist,leaf_freelist);
			exit(1);
		}
		LEAF_M[p]= (leafnode *)calloc(LEAF_BLOCK_SIZE,sizeof(leafnode));
		if (LEAF_M[p] == NULL) {
			fprintf(stderr,"Out of memory! in allocation of new leafnode block; exiting");
			exit(2);
		}
		/* add new slots to freelist */
		leaf_freelist= leafnode_index(p,0);
		for (i=0; i < (LEAF_BLOCK_SIZE-1); i++) {
#ifdef EXTRA_MARK_FREE
			LEAF_M[p][i].count= -1;
#endif
			lfreenext(LEAF_M[p][i])= leafnode_index(p,i+1);
		}
#ifdef EXTRA_MARK_FREE
		LEAF_M[p][LEAF_BLOCK_SIZE-1].count= -1;
#endif
		lfreenext(LEAF_M[p][LEAF_BLOCK_SIZE-1])= TNULL;
	}
	/* give out the head and make its next the new head */
	res= leaf_freelist;
	leaf_freelist= lfreenext(leafnode(leaf_freelist));
	leafvalue(res)= val;
	leafcount(res)= 1;
	leafnexttree(res)= TNULL;
	return res;
}

/* free the leafnode node given */
void free_leaf(mindex f) {
#ifdef EXTRA_MARK_FREE
	leafcount(f)= -1;
#endif
	/* add it to the start of the list */
	lfreenext(leafnode(f))= leaf_freelist;
	leaf_freelist= f;
}

/* $Id: spp_anomsensor.c,v 1.5 2001/01/25 19:16:16 fygrave Exp $ */
/*********************************************************************
store.c, distributed as part of Spade v092200.1
Author: James Hoagland, Silicon Defense (hoagland@SiliconDefense.com)
copyright (c) 2000 by Silicon Defense (http://www.silicondefense.com/)
Released under GNU General Public License, see the COPYING file included
with the distribution or http://www.silicondefense.com/spice/ for details.

store.c contains all the checkpoint and recovery functionality in Spade.

Please send complaints, kudos, and especially improvements and bugfixes to
hoagland@SiliconDefense.com.  As described in GNU General Public License, no
warranty is expressed for this program.
*********************************************************************/

#include <errno.h>

int checkpoint(char *filename) {
	unsigned char uc;
	char v='v';
	unsigned char fvers= CUR_FVERS;
	u16 v16;
	u32 v32;
	unsigned int i,rb_used,ib_used,lb_used;
	
	FILE *f;
	errno=0;
	f= fopen(filename,"wb");
	if (errno) {
		perror(filename);
		return 0;
	}
	
	fwrite(&v,sizeof(v),1,f);
	fwrite(&fvers,sizeof(fvers),1,f);
	uc= sizeof(u16);
	fwrite(&uc,sizeof(uc),1,f);
	uc= sizeof(u32);
	fwrite(&uc,sizeof(uc),1,f);
	v16= MAX_U16;
	fwrite(&v16,sizeof(v16),1,f);
	v32= MAX_U32;
	fwrite(&v32,sizeof(v32),1,f);
	
	fwrite(&ROOT_BLOCK_BITS,sizeof(ROOT_BLOCK_BITS),1,f);
	fwrite(&INT_BLOCK_BITS,sizeof(INT_BLOCK_BITS),1,f);
	fwrite(&LEAF_BLOCK_BITS,sizeof(LEAF_BLOCK_BITS),1,f);
	fwrite(&MAX_ROOT_BLOCKS,sizeof(MAX_ROOT_BLOCKS),1,f);
	fwrite(&MAX_INT_BLOCKS,sizeof(MAX_INT_BLOCKS),1,f);
	fwrite(&MAX_LEAF_BLOCKS,sizeof(MAX_LEAF_BLOCKS),1,f);

	fwrite(&TNULL,sizeof(TNULL),1,f);
	fwrite(&DMINDEXMASK,sizeof(DMINDEXMASK),1,f);
	
	fwrite(T,sizeof(mindex),NUM_FEATURES,f);
	
	fwrite(&root_freelist,sizeof(root_freelist),1,f);
	fwrite(&int_freelist,sizeof(int_freelist),1,f);
	fwrite(&leaf_freelist,sizeof(leaf_freelist),1,f);
	
	for (rb_used= 0; ROOT_M[rb_used] != NULL; rb_used++) {}
	fwrite(&rb_used,sizeof(rb_used),1,f);
	for (ib_used= 0; INT_M[ib_used] != NULL; ib_used++) {}
	fwrite(&ib_used,sizeof(ib_used),1,f);
	for (lb_used= 0; LEAF_M[lb_used] != NULL; lb_used++) {}
	fwrite(&lb_used,sizeof(lb_used),1,f);
	
	for (i= 0; i < rb_used; i++) {
		fwrite(ROOT_M[i],sizeof(treeroot),ROOT_BLOCK_SIZE,f);
	}
	for (i= 0; i < ib_used; i++) {
		fwrite(INT_M[i],sizeof(intnode),INT_BLOCK_SIZE,f);
	}
	for (i= 0; i < lb_used; i++) {
		fwrite(LEAF_M[i],sizeof(leafnode),LEAF_BLOCK_SIZE,f);
	}

	fclose(f);
	return 1;
}

int recover(char *filename) {
	unsigned char uc,fvers;
	char v;
	u16 v16;
	u32 v32;
	unsigned int i,rb_used,ib_used,lb_used,FILE_MAX_ROOT_BLOCKS,FILE_MAX_INT_BLOCKS,FILE_MAX_LEAF_BLOCKS;
	FILE *f;
	errno=0;
	f= fopen(filename,"rb");
	if (errno) { /* file prob does not exist */
		return 0;
	}
	
	fread(&v,sizeof(v),1,f);
	if (v == 'v') { /* format version # encoded */
		fread(&fvers,sizeof(fvers),1,f);
		fread(&uc,sizeof(uc),1,f);
	} else { /* format version #0; only diff from version 1 is that the version # is listed */
		fvers= 0;
		uc= (unsigned char)v;
	}
	if (fvers > CUR_FVERS) {
		fprintf(stderr,"This version of the state recover procedure cannot read file %s since has format version %d; this routine can only handle up to version %d\n",filename,fvers,CUR_FVERS);
		fclose(f);
		return 0;
	}
	
	if (sizeof(u16) != uc) {
		fprintf(stderr,"u16 type size from recovery file (%s) (%d bytes) does not match current size (%d bytes)\n",filename,sizeof(u16),uc);
		fclose(f);
		return 0;
	}
	fread(&uc,sizeof(uc),1,f);
	if (sizeof(u32) != uc) {
		fprintf(stderr,"u32 type size from recovery file (%s) (%d bytes) does not match current size (%d bytes)\n",filename,sizeof(u32),uc);
		fclose(f);
		return 0;
	}
	fread(&v16,sizeof(v16),1,f);
	if (v16 != MAX_U16) {
		fprintf(stderr,"strange: MAX_U16 from recovery file (%s) (%d) does not match current value (%d)\n",filename,MAX_U16,v16);
		fclose(f);
		return 0;
	}
	fread(&v32,sizeof(v32),1,f);
	if (v32 != MAX_U32) {
		fprintf(stderr,"strange: MAX_U32 from recovery file (%s) (%d) does not match current value (%d)\n",filename,MAX_U32,v32);
		fclose(f);
		return 0;
	}
	
	fread(&ROOT_BLOCK_BITS,sizeof(ROOT_BLOCK_BITS),1,f);
	fread(&INT_BLOCK_BITS,sizeof(INT_BLOCK_BITS),1,f);
	fread(&LEAF_BLOCK_BITS,sizeof(LEAF_BLOCK_BITS),1,f);
	fread(&FILE_MAX_ROOT_BLOCKS,sizeof(MAX_ROOT_BLOCKS),1,f);
	fread(&FILE_MAX_INT_BLOCKS,sizeof(MAX_INT_BLOCKS),1,f);
	fread(&FILE_MAX_LEAF_BLOCKS,sizeof(MAX_LEAF_BLOCKS),1,f);

	fread(&TNULL,sizeof(TNULL),1,f);
	fread(&DMINDEXMASK,sizeof(DMINDEXMASK),1,f);

	fread(T,sizeof(mindex),NUM_FEATURES,f);
	
	fread(&root_freelist,sizeof(root_freelist),1,f);
	fread(&int_freelist,sizeof(int_freelist),1,f);
	fread(&leaf_freelist,sizeof(leaf_freelist),1,f);

	fread(&rb_used,sizeof(rb_used),1,f);
	/* use the size for this run unless there is more stored in the file */
	MAX_ROOT_BLOCKS= rb_used > DEFAULT_MAX_ROOT_BLOCKS ? rb_used : DEFAULT_MAX_ROOT_BLOCKS;
	fread(&ib_used,sizeof(ib_used),1,f);
	MAX_INT_BLOCKS= ib_used > DEFAULT_MAX_INT_BLOCKS ? ib_used : DEFAULT_MAX_INT_BLOCKS;
	fread(&lb_used,sizeof(lb_used),1,f);
	MAX_LEAF_BLOCKS= rb_used > DEFAULT_MAX_LEAF_BLOCKS ? lb_used : DEFAULT_MAX_LEAF_BLOCKS;

	allocate_mem_blocks();
	
	for (i= 0; i < rb_used; i++) {
		ROOT_M[i]= (treeroot *)malloc(sizeof(treeroot)*ROOT_BLOCK_SIZE);
		fread(ROOT_M[i],sizeof(treeroot),ROOT_BLOCK_SIZE,f);
	}
	for (i= 0; i < ib_used; i++) {
		INT_M[i]= (intnode *)malloc(sizeof(intnode)*INT_BLOCK_SIZE);
		fread(INT_M[i],sizeof(intnode),INT_BLOCK_SIZE,f);
	}
	for (i= 0; i < lb_used; i++) {
		LEAF_M[i]= (leafnode *)malloc(sizeof(leafnode)*LEAF_BLOCK_SIZE);
		fread(LEAF_M[i],sizeof(leafnode),LEAF_BLOCK_SIZE,f);
	}
	fclose(f);
	return 1;
}

/* $Id: spp_anomsensor.c,v 1.5 2001/01/25 19:16:16 fygrave Exp $ */
