/*
 * Decompiled with CFR 0.152.
 */
package sklearn2pmml.feature_extraction.text;

import com.google.common.base.Joiner;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.dmg.pmml.TextIndex;
import sklearn.feature_extraction.text.Tokenizer;

public class Matcher
extends Tokenizer {
    public Matcher() {
        this("sklearn2pmml.feature_extraction.text", "Matcher");
    }

    public Matcher(String module, String name) {
        super(module, name);
    }

    @Override
    public TextIndex configure(TextIndex textIndex) {
        String wordRE = this.getWordRE();
        return textIndex.setTokenize(Boolean.TRUE).setWordRE(wordRE);
    }

    @Override
    public String formatStopWordsRE(List<String> stopWords) {
        String wordRE = this.getWordRE();
        Pattern pattern = Pattern.compile(wordRE);
        if ((stopWords = stopWords.stream().filter(pattern.asPredicate()).collect(Collectors.toList())).isEmpty()) {
            return null;
        }
        boolean unicode = wordRE.startsWith("(?u)");
        Joiner joiner = Joiner.on((String)"|");
        return (unicode ? "(?u)" : "") + "\\b(" + joiner.join(stopWords) + ")\\b";
    }

    public void __setstate__(String wordRE) {
        this.setWordRE(wordRE);
    }

    public String getWordRE() {
        return this.getString("word_re");
    }

    public Matcher setWordRE(String wordRE) {
        this.put("word_re", wordRE);
        return this;
    }
}

