#!/bin/sh

#
# This is a small script used to interact with run-masses to do a full
# corpus mass-check run, including the rsync to the SA server.
# Change the appropriate variables below.
#
# By default, it'll do a set0 run, but you can change that by adding
# --net or --bayes to the commandline.
#
# --net by itself will automatically try running 6 mass-checks in parallel
#

CORPUS=/home/felicity/SA/corpus
SA_VER=/home/felicity/SA/spamassassin-corpora
export RSYNC_PASSWORD="your_rsync_password"

NET=0
BAYES=0
OPTS="--progress"
FILENAME="your_rsync_username"

while [ ! -z "$1" ]; do
  if [ "$1" = "--net" ]; then
    NET=1
  elif [ "$1" = "--bayes" ]; then
    BAYES=1
  fi
  shift
done

if [ $NET -eq 1 ]; then
  FILENAME="net-$FILENAME"
  OPTS="$OPTS --net"

  # We want to do this with more parallelization, but not if Bayes is also running ...
  if [ $BAYES -eq 0 ]; then
    OPTS="$OPTS -j 6"
  fi
fi
if [ $BAYES -eq 1 ]; then
  FILENAME="bayes-$FILENAME"
  OPTS="$OPTS --bayes"
fi

# Update SA version before our run
echo "[Updating $SA_VER]"
cd $SA_VER
COUNT=0
while ! cvs -q up; do
  sleep 60
  COUNT=`expr $COUNT + 1`
  if [ $COUNT -gt 5 ]; then
    echo "Couldn't do a CVS update, aborting!" >&2
    exit 2
  fi
done

# update the corpus with the latest/greatest mail files
echo "[Updating Corpus]"
cd $CORPUS
$CORPUS/update -q

# remove current bayes db set
echo "[Removing old Bayes DB]"
rm -f $SA_VER/masses/spamassassin/bayes*

# do the run
echo "[Running mass-check '$OPTS' in $CORPUS]"
$CORPUS/run-masses $SA_VER $OPTS > /dev/null

if [ ! -s ham.log -o ! -s spam.log ]; then
	echo "There seems to be a problem with either ham.log or spam.log, aborting!" >&2
	exit 1
fi

mv -f ham.log ham-$FILENAME.log
mv -f spam.log spam-$FILENAME.log
mv -f results.log results-$FILENAME.log

# now we have our ham.log and spam.log files...
echo "[Uploading daily corpus logs]"
rsync -qCPcvuzb *-$FILENAME.log $FILENAME@rsync.spamassassin.org::corpus/

echo "[Our results]"
cat results-$FILENAME.log
