/*
 * Decompiled with CFR 0.152.
 */
package jsat.datatransform.featureselection;

import jsat.DataSet;
import jsat.classifiers.CategoricalData;
import jsat.classifiers.ClassificationDataSet;
import jsat.classifiers.DataPoint;
import jsat.datatransform.DataTransformFactoryParm;
import jsat.datatransform.RemoveAttributeTransform;
import jsat.exceptions.FailedToFitException;
import jsat.linear.IndexValue;
import jsat.linear.Vec;
import jsat.utils.IndexTable;
import jsat.utils.IntSet;

public class MutualInfoFS
extends RemoveAttributeTransform {
    private static final long serialVersionUID = -4394620220403363542L;

    public MutualInfoFS(ClassificationDataSet dataSet, int featureCount) {
        this(dataSet, featureCount, NumericalHandeling.BINARY);
    }

    protected MutualInfoFS(MutualInfoFS toCopy) {
        super(toCopy);
    }

    public MutualInfoFS(ClassificationDataSet dataSet, int featureCount, NumericalHandeling numericHandling) {
        int numCatVars;
        if (featureCount <= 0) {
            throw new RuntimeException("Number of features to select must be positive");
        }
        int N = dataSet.getSampleSize();
        double[] classPriors = dataSet.getPriors();
        double[] logClassPriors = new double[classPriors.length];
        for (int i = 0; i < logClassPriors.length; ++i) {
            logClassPriors[i] = Math.log(classPriors[i]);
        }
        int consideredCount = numCatVars = dataSet.getNumCategoricalVars();
        if (numericHandling != NumericalHandeling.NONE) {
            consideredCount = dataSet.getNumFeatures();
        }
        double[][] featPriors = new double[consideredCount][];
        CategoricalData[] catInfo = dataSet.getCategories();
        double[][][] jointProb = new double[consideredCount][][];
        for (int i = 0; i < jointProb.length; ++i) {
            if (i < dataSet.getNumCategoricalVars()) {
                int options = catInfo[i].getNumOfCategories();
                jointProb[i] = new double[options][logClassPriors.length];
                featPriors[i] = new double[options];
                continue;
            }
            jointProb[i] = new double[2][logClassPriors.length];
            featPriors[i] = new double[1];
        }
        double weightSum = 0.0;
        for (int i = 0; i < dataSet.getSampleSize(); ++i) {
            DataPoint dp = dataSet.getDataPoint(i);
            int trueClass = dataSet.getDataPointCategory(i);
            double weight = dp.getWeight();
            weightSum += weight;
            int[] catVals = dp.getCategoricalValues();
            for (int j = 0; j < catVals.length; ++j) {
                double[] dArray = featPriors[j];
                int n = catVals[j];
                dArray[n] = dArray[n] + weight;
                double[] dArray2 = jointProb[j][catVals[j]];
                int n2 = trueClass;
                dArray2[n2] = dArray2[n2] + weight;
            }
            if (numericHandling != NumericalHandeling.BINARY) continue;
            Vec numeric = dp.getNumericalValues();
            for (IndexValue iv : numeric) {
                double[] dArray = featPriors[iv.getIndex() + numCatVars];
                dArray[0] = dArray[0] + weight;
                double[] dArray3 = jointProb[iv.getIndex() + numCatVars][0];
                int n = trueClass;
                dArray3[n] = dArray3[n] + weight;
            }
        }
        double[] mis = new double[consideredCount];
        for (int i = 0; i < consideredCount; ++i) {
            double mi = 0.0;
            if (i < dataSet.getNumCategoricalVars()) {
                for (int tVal = 0; tVal < jointProb[i].length; ++tVal) {
                    double featPrior = featPriors[i][tVal] / weightSum;
                    if (featPrior == 0.0) continue;
                    double logFeatPrior = Math.log(featPrior);
                    for (int tClass = 0; tClass < logClassPriors.length; ++tClass) {
                        double jp = jointProb[i][tVal][tClass] / weightSum;
                        if (jp == 0.0) continue;
                        mi += jp * (Math.log(jp) - logFeatPrior - logClassPriors[tClass]);
                    }
                }
            } else {
                for (int tClass = 0; tClass < classPriors.length; ++tClass) {
                    double jpNeg = jointProb[i][0][tClass] / weightSum;
                    double jpPos = (classPriors[tClass] * (double)N - jointProb[i][0][tClass]) / weightSum;
                    double posPrio = featPriors[i][0] / weightSum;
                    double negPrio = 1.0 - posPrio;
                    if (jpNeg != 0.0 && negPrio != 0.0) {
                        mi += jpNeg * (Math.log(jpNeg) - Math.log(negPrio) - logClassPriors[tClass]);
                    }
                    if (jpPos == 0.0 || posPrio == 0.0) continue;
                    mi += jpPos * (Math.log(jpPos) - Math.log(posPrio) - logClassPriors[tClass]);
                }
            }
            mis[i] = mi;
        }
        IndexTable sortedOrder = new IndexTable(mis);
        IntSet catToRemove = new IntSet();
        IntSet numToRemove = new IntSet();
        for (int i = 0; i < consideredCount - featureCount; ++i) {
            int removingIndex = sortedOrder.index(i);
            if (removingIndex < numCatVars) {
                catToRemove.add(removingIndex);
                continue;
            }
            numToRemove.add(removingIndex - numCatVars);
        }
        this.setUp(dataSet, catToRemove, numToRemove);
    }

    @Override
    public MutualInfoFS clone() {
        return new MutualInfoFS(this);
    }

    public static class MutualInfoFSFactory
    extends DataTransformFactoryParm {
        private int featureCount;
        private NumericalHandeling handling;

        public MutualInfoFSFactory(int featureCount) {
            this(featureCount, NumericalHandeling.BINARY);
        }

        public MutualInfoFSFactory(int featureCount, NumericalHandeling handling) {
            this.setFeatureCount(featureCount);
            this.setHandling(handling);
        }

        public MutualInfoFSFactory(MutualInfoFSFactory toCopy) {
            this(toCopy.featureCount, toCopy.handling);
        }

        public void setFeatureCount(int featureCount) {
            if (featureCount < 1) {
                throw new IllegalArgumentException("Number of features must be positive, not " + featureCount);
            }
            this.featureCount = featureCount;
        }

        public int getFeatureCount() {
            return this.featureCount;
        }

        public void setHandling(NumericalHandeling handling) {
            this.handling = handling;
        }

        public NumericalHandeling getHandling() {
            return this.handling;
        }

        @Override
        public MutualInfoFS getTransform(DataSet dataset) {
            if (!(dataset instanceof ClassificationDataSet)) {
                throw new FailedToFitException("The given data set was not a classification data set");
            }
            ClassificationDataSet cds = (ClassificationDataSet)dataset;
            return new MutualInfoFS(cds, this.featureCount, this.handling);
        }

        @Override
        public MutualInfoFSFactory clone() {
            return new MutualInfoFSFactory(this);
        }
    }

    public static enum NumericalHandeling {
        NONE,
        BINARY;

    }
}

