#!/bin/bash
#
# transform the ARFF file into csv files (for import into Proper)

# the usage of this script
function usage()
{
   echo
   echo "usage: ${0##*/} -a <file> -s <file> -u <file> [-h]"
   echo 
   echo "combines the names file with the data file"
   echo
   echo " -h   this help"
   echo " -a   <file>"
   echo "      the ARFF file to use as input for the conversion to CSV"
   echo "      default: $ARFF"
   echo " -s   <file>"
   echo "      the CSV file containing the speakers"
   echo "      default: $SPEAKER"
   echo " -u   <file>"
   echo "      the CSV file containing the utterances"
   echo "      default: $UTTERANCE"
   echo
}

# variables
ROOT=`expr "$0" : '\(.*\)/'`
ARFF="$ROOT/JapaneseVowels.arff"
DATA="$ROOT/JapaneseVowels.data"
ATTS="$ROOT/JapaneseVowels.atts"
SPEAKER="$ROOT/JapaneseVowels_speaker.csv"
UTTERANCE="$ROOT/JapaneseVowels_utterance.csv"

# interprete parameters
while getopts ":ha:s:u:" flag
do
   case $flag in
      a) ARFF=$OPTARG
         ;;
      s) SPEAKER=$OPTARG
         ;;
      u) UTTERANCE=$OPTARG
         ;;
      h) usage
         exit 0
         ;;
      *) usage
         exit 1
         ;;
   esac
done

# extract data from arff file
echo "Extracting data..."
cat $ARFF | grep -v "^%\|^@\|^$" > $DATA

# extract attributes from file
echo "Extracting attributes..."
TMP=`cat $ARFF | grep "^@attribute" | cut -f2 -d" "`
TMP=`echo $TMP | sed s/" "/","/g`
echo $TMP > $ATTS

# create speaker/utterance file
echo "Creating Speaker/Utterance file..."
cat $ATTS | cut -f1 -d"," > $SPEAKER"1"
cat $DATA | cut -f1,2 -d"," | sort -u | cut -f1 -d"," >> $SPEAKER"1"
cat $ATTS | cut -f1,2 -d"," | sed s/","/"_"/g > $SPEAKER"2"
cat $DATA | cut -f1,2 -d"," | sort -u | sed s/","/"_"/g >> $SPEAKER"2"
paste --delimiter="," $SPEAKER"1" $SPEAKER"2" > $SPEAKER

# create utterance/frame file
echo "Creating Utterance/Frame file..."
cat $ATTS | cut -f1,2 -d"," | sed s/","/"_"/g > $UTTERANCE"1"
cat $DATA | cut -f1,2 -d"," | sed s/","/"_"/g >> $UTTERANCE"1"
cat $ATTS | cut -f2- -d"," > $UTTERANCE"2"
cat $DATA | cut -f2- -d"," >> $UTTERANCE"2"
paste --delimiter="," $UTTERANCE"1" $UTTERANCE"2" > $UTTERANCE

# clean up
echo "Cleaning up..."
rm -f $DATA
rm -f $ATTS
rm -f $SPEAKER"1"
rm -f $SPEAKER"2"
rm -f $UTTERANCE"1"
rm -f $UTTERANCE"2"
