#!/bin/sh

# set SCORESET
. config

NAME="set$SCORESET"

# beware!
svn revert ../rules/50_scores.cf

if [ ! -f "ORIG/ham-$NAME.log" -o ! -f "ORIG/spam-$NAME.log" ]; then
	echo "Couldn't find logs for $NAME" >&2
	exit 1
fi

if [ "x$1" = "x" ]; then
echo "[Doing a scoreset $SCORESET score-generation run]"

# Clean out old runs
echo "[Cleaning up]"
rm -rf spam-validate.log ham-validate.log spam.log ham.log \
	NSBASE SPBASE tmp make.output freqs perceptron.scores \
	gen-$NAME.out gen-$NAME.scores gen-$NAME.validate
make clean >/dev/null

# Generate 90/10 split logs
echo "[Generating 90/10 split ham]"
mkdir NSBASE SPBASE
cd NSBASE
../tenpass/split-log-into-buckets 10 < ../ORIG/ham-$NAME.log > /dev/null
cat split-[1-9].log > ham.log
rm -f split-[1-9].log
mv split-10.log ham-validate.log

echo "[Generating 90/10 split spam]"
cd ../SPBASE
../tenpass/split-log-into-buckets 10 < ../ORIG/spam-$NAME.log > /dev/null
cat split-[1-9].log > spam.log
rm -f split-[1-9].log
mv split-10.log spam-validate.log
cd ..

echo "[Setting up for gen run]"
# Ok, setup for a run
ln -s SPBASE/spam.log .
ln -s NSBASE/ham.log .
ln -s SPBASE/spam-validate.log .
ln -s NSBASE/ham-validate.log .

# try to find number of processors
numcpus=`cpucount 2>/dev/null || egrep -c '^processor\b' /proc/cpuinfo 2>/dev/null || echo 1`

echo "[Generating perceptron]"
# Generate perceptron with full logs
make -j $numcpus SCORESET=$SCORESET > make.output 2>&1

for threshold in 5.0 4.9 4.8 4.7 4.6 4.5 4.4 4.3 4.2 4.1 4.0; do
	(
	echo "[gen run start]"
	pwd
	date
	./perceptron -t $threshold -p 2.0 -e 100
	mv perceptron.scores gen-$NAME-$threshold.scores
	echo "[gen run end]"
	pwd
	date
	) | tee gen-$NAME-$threshold.out
	svn revert ../rules/50_scores.cf
	./rewrite-cf-with-new-scores $SCORESET ../rules/50_scores.cf gen-$NAME-$threshold.scores > /tmp/runGA.$$
	mv /tmp/runGA.$$ ../rules/50_scores.cf
	./fp-fn-statistics --ham ham-validate.log --spam spam-validate.log --scoreset $SCORESET > gen-$NAME-$threshold.statistics
done

else

# This needs to have 50_scores.cf in place first ...
echo "[gen validation results]"
./logs-to-c --spam=SPBASE/spam-validate.log \
	--ham=NSBASE/ham-validate.log \
	--count --cffile=../rules --scoreset=$SCORESET | tee gen-$NAME.validate

echo "[STATISTICS file generation]"
./mk-baseline-results $SCORESET | tee gen-$NAME.statistics
fi

exit 0
