#!/bin/csh -f

# This is an example shell script that can be modified to easily run 
# a program numerous times with parameter variations.
#
# Place this file in the directory with your data and give it a
# chmod 700 to set executable permission.


# size of test set
set testsetsize=30


# Replace "dataset1 dataset2 dataset3" with the base names of your
# data sets, e.g. "vote", "monks", and your third set.  The variable
# U below will represent a file name.
foreach U (dataset1 dataset2 dataset3)

  set D=$U-train

  set T=$U-test

  # backquotes execute a command in a subshell and returns the result; very handy
  set mainsetsize=`wc -l $U`

  # Split the data file into test and traning (D and T).  Writes $testsetsize
  # randomly-selected examples into $T and the rest in $D.
  #
  # Assumes one example per line.
  #
  # You should only run this once; otherwise your results will be inconsistent
  # across multiple runs.
  #
  # Caveat: this doesn't actually check if you choose the same line twice.
  # If it does happen (unlikely), you could have a problem.
  cat $U | awk -v testsize=$testsetsize -v mainsetsize=$mainsize \
   -v dfile=$D -v tfile=$T \
   'BEGIN {srand();for (i=0;i<testsize;i++) a[1+int(rand()*mainsetsize)]=i; }' \
   '(NR in a) {print $0 >> tfile} (!(NR in a)) {print $0 >> dfile}'

  set trainsize=`wc -l $D`
  set testsize=`wc -l $T`

  # compute the 5 training set sizes
  # (may be a little off, producing an extra number if $trainsize-10 is not
  # evenly divisible)
  set sizes=`echo 10 $trainsize | awk '{interv=int(($2-$1)/4); for (i=$1;i<$2;i+=interv) print i; print $2  }'`

  # run the 5 training set sizes
  set i=1
  while ($i <= 3)
    # run three iterations
    set j=1
    while ($j <= 3)

      # Here we assume that your code will subsample the training set for
      # us.  Alternatively one can adapt the random selection code from 
      # above.

      ./runmyprogramrightnow -trainset $D -testset $T   \
        -sizeofsubsample $sizes[$i]  \
         -treeoutputfile treefile.$U.$i.$j -plotoutputfile plotfile.$U.$i.$j
    
      @ j++
    end
    @ i++
  end

end
