/*
 * Decompiled with CFR 0.152.
 */
package jsat.text;

import java.util.ArrayList;
import java.util.List;
import jsat.linear.SparseVector;
import jsat.linear.Vec;
import jsat.text.TextVectorCreator;
import jsat.text.tokenizer.Tokenizer;
import jsat.text.wordweighting.WordWeighting;

public class HashedTextVectorCreator
implements TextVectorCreator {
    private static final long serialVersionUID = 1081388790985568192L;
    private int dimensionSize;
    private Tokenizer tokenizer;
    private WordWeighting weighting;

    public HashedTextVectorCreator(int dimensionSize, Tokenizer tokenizer, WordWeighting weighting) {
        if (dimensionSize <= 1) {
            throw new ArithmeticException("Vector dimension must be a positive value");
        }
        this.dimensionSize = dimensionSize;
        this.tokenizer = tokenizer;
        this.weighting = weighting;
    }

    @Override
    public Vec newText(String input) {
        return this.newText(input, new StringBuilder(), new ArrayList<String>());
    }

    @Override
    public Vec newText(String input, StringBuilder workSpace, List<String> storageSpace) {
        this.tokenizer.tokenize(input, workSpace, storageSpace);
        SparseVector vec = new SparseVector(this.dimensionSize);
        for (String word : storageSpace) {
            vec.increment(Math.abs(word.hashCode()) % this.dimensionSize, 1.0);
        }
        this.weighting.applyTo(vec);
        return vec;
    }
}

