/*
 * Decompiled with CFR 0.152.
 */
package jsat.text;

import java.util.List;
import jsat.classifiers.CategoricalData;
import jsat.classifiers.ClassificationDataSet;
import jsat.linear.SparseVector;
import jsat.linear.Vec;
import jsat.text.HashedTextDataLoader;
import jsat.text.tokenizer.Tokenizer;
import jsat.text.wordweighting.WordWeighting;
import jsat.utils.IntList;

public abstract class ClassificationHashedTextDataLoader
extends HashedTextDataLoader {
    private static final long serialVersionUID = -1350008848821058696L;
    protected List<Integer> classLabels = new IntList();
    protected CategoricalData labelInfo;

    public ClassificationHashedTextDataLoader(Tokenizer tokenizer, WordWeighting weighting) {
        this(0x400000, tokenizer, weighting);
    }

    public ClassificationHashedTextDataLoader(int dimensionSize, Tokenizer tokenizer, WordWeighting weighting) {
        super(dimensionSize, tokenizer, weighting);
    }

    protected abstract void setLabelInfo();

    @Override
    protected void addOriginalDocument(String text) {
        throw new UnsupportedOperationException("addOriginalDocument(String text, int label) should be used instead");
    }

    protected void addOriginalDocument(String text, int label) {
        if (label >= this.labelInfo.getNumOfCategories()) {
            throw new RuntimeException("Invalid label given");
        }
        super.addOriginalDocument(text);
        this.classLabels.add(label);
    }

    @Override
    public ClassificationDataSet getDataSet() {
        if (!this.noMoreAdding) {
            this.setLabelInfo();
            this.initialLoad();
            this.finishAdding();
        }
        ClassificationDataSet cds = new ClassificationDataSet(((SparseVector)this.vectors.get(0)).length(), new CategoricalData[0], this.labelInfo);
        for (int i = 0; i < this.vectors.size(); ++i) {
            cds.addDataPoint((Vec)this.vectors.get(i), new int[0], this.classLabels.get(i));
        }
        return cds;
    }
}

