#!/bin/bash
#
# This script copies the arff files to a destination folder, but only the
# "data" files and not the statistics files. Furthermore it divides them
# into MI and RELAGGS files.
#
# $Revision: 1.2 $
# FracPete

# the usage of this script
function usage()
{
   echo
   echo "usage: ${0##*/} -s <src-dir> -d <dest-dir> -m <dir> -r <dir> -D -c [-h]"
   echo 
   echo "copies arff files to a destination directory (only the data files!)"
   echo "it also divides them into Multi-Instance and RELAGGS files."
   echo
   echo " -h   this help"
   echo " -s   <src-dir>"
   echo "      the directory where the ARFF files are right now"
   echo "      default: $SRC"
   echo " -d   <dest-dir>"
   echo "      the directory where the ARFF files are to be stored"
   echo "      default: $DEST"
   echo " -m   <dir>"
   echo "      the sub-directory for the Multi-Instance files"
   echo "      default: $MULTI"
   echo " -r   <dir>"
   echo "      the sub-directory for the RELAGGS files"
   echo "      default: $RELAGGS"
   echo " -D   whether to delete the destination dir"
   echo "      default: $DELETE"
   echo " -c   compress the files with gzip"
   echo
}

# creates the directory TMP if necessary
function create_dir()
{
   if [ ! -d $TMP ]
   then
      mkdir -p $TMP
   fi
}

# copies files to DIR if they fit INCLUDE and EXCLUDE
function copy_files()
{
   COPIED="no"

   if [ ! "$INCLUDE" = "" ]
   then
      if [ ! "$EXCLUDE" = "" ]
      then
         COPIED="yes"
         cp `ls -Ss1d $SRC/*.arff | grep -v " 0 \| 1 " | sed s/^" "*//g | cut -f2 -d" " | grep $INCLUDE | grep -v $EXCLUDE` $DIR
      fi
   else
      if [ ! "$EXCLUDE" = "" ]
      then
         COPIED="yes"
         cp `ls -Ss1d $SRC/*.arff | grep -v " 0 \| 1 " | sed s/^" "*//g | cut -f2 -d" " | grep -v $EXCLUDE` $DIR
      fi
   fi

   if [ "$COPIED" = "no" ]
   then
      echo "Nothing copied!"
   fi
}

# removes empty files in DIR, i.e. that have nothing in the @data section
function remove_empty_files()
{
   for i in $DIR/*.arff
   do
      TMP=`tail -n1 $i`
      TMP=`echo $TMP | sed s/" "*//g`
      if [ "$TMP" = "@data" ]
      then
         echo "deleting $i..."
         rm -f $i
      fi
   done
}

# variables
ROOT=`expr "$0" : '\(.*\)/'`
SRC="$ROOT/../tmp"
DEST=$ROOT/../tmp/arff
MULTI="Multi-Instance"
RELAGGS="RELAGGS"
DELETE="no"
COMPRESS="no"
# all files with ext. arff that don't have 0 or 1 block as size
#ls -Ss1d *.arff | grep -v " 0 \| 1 " | sed s/^" "*//g | cut -f2 -d" "

# interprete parameters
while getopts ":hs:d:m:r:Dc" flag
do
   case $flag in
      s) SRC=$OPTARG
         ;;
      d) DEST=$OPTARG
         ;;
      m) MULTI=$OPTARG
         ;;
      r) RELAGGS=$OPTARG
         ;;
      D) DELETE="yes"
         ;;
      c) COMPRESS="yes"
         ;;
      h) usage
         exit 0
         ;;
      *) usage
         exit 1
         ;;
   esac
done

# delete content of dir
if [ "$DELETE" = "yes" ] && [ -d $DEST ]
then
   echo "Deleting $DEST..."
   rm -R $DEST/*
fi

# create dirs
TMP=$DEST/$MULTI;create_dir
TMP=$DEST/$RELAGGS;create_dir

# copy Multi-Instance
echo "Copying Multi-Instance..."
INCLUDE="\-mi\|\-remi";EXCLUDE="\-stat\|\-classified";DIR=$DEST/$MULTI;copy_files

# copy RELAGGS
echo "Copying RELAGGS..."
INCLUDE="";EXCLUDE="\-mi\|\-remi\|\-stat\|\-classified";DIR=$DEST/$RELAGGS;copy_files

# remove empty files
DIR=$DEST/$MULTI;remove_empty_files
DIR=$DEST/$RELAGGS;remove_empty_files

# compress files
if [ "$COMPRESS" = "yes" ]
then
   echo "Compressing..."
   find $DEST/$MULTI -name "*.arff" -exec gzip {} \;
   find $DEST/$RELAGGS -name "*.arff" -exec gzip {} \;
fi

