/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.transform.interactivedatapreparation.token;

import com.dataiku.dip.shaker.processors.transform.interactivedatapreparation.token.Anything;
import com.dataiku.dip.shaker.processors.transform.interactivedatapreparation.token.QuantifiedToken;
import com.dataiku.dip.shaker.processors.transform.interactivedatapreparation.token.SimpleToken;
import com.dataiku.dip.shaker.processors.transform.interactivedatapreparation.token.Token;
import com.dataiku.dss.shadelib.com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Pattern;

public class TokenBuilder {
    static final List<SimpleToken> WHOLE_WORD_TOKENS = Lists.newArrayList((Object[])new SimpleToken[]{new SimpleToken("\\pP", 2), new SimpleToken("[a-zA-Z0-9_]+", 2), new SimpleToken("[a-z]+", 3), new SimpleToken("[A-Z]+", 3), new SimpleToken("[0-9]+", 3), new SimpleToken("[a-zA-Z]+", 3), new SimpleToken("[A-Z][a-z]+", 3), new SimpleToken("[\\p{L}\\p{N}\\pP]+", 3), new SimpleToken("\\p{Ll}+", 4), new SimpleToken("\\p{Lu}+", 4), new SimpleToken("\\p{N}+", 4), new SimpleToken("\\p{L}+", 4), new SimpleToken("\\p{Lu}\\p{Ll}+", 4)});
    static final List<QuantifiedToken> CHAR_TOKENS = Lists.newArrayList((Object[])new QuantifiedToken[]{new QuantifiedToken("[A-Z]", 1, 1), new QuantifiedToken("[a-z]", 1, 1), new QuantifiedToken("[0-9]", 1, 1), new QuantifiedToken("\\pP", 1, 1), new QuantifiedToken("\\p{Lu}", 2, 1), new QuantifiedToken("\\p{Ll}", 2, 1), new QuantifiedToken("\\p{N}", 2, 1)});
    public static final int MEAN_TOKEN_COMPLEXITY = 4;
    public static final Pattern REGEX_ESCAPE = Pattern.compile("[-\\[\\]{}()*+?.,^$\\\\|#\\s]");

    QuantifiedToken buildCharToken(char letter) {
        for (QuantifiedToken token : CHAR_TOKENS) {
            if (!token.compatible(letter)) continue;
            return token;
        }
        return new QuantifiedToken(TokenBuilder.betterEscape(Character.toString(letter)), 2, 1);
    }

    public List<QuantifiedToken> buildPrecise(String sentence) {
        char[] characters;
        Stack<QuantifiedToken> tokens = new Stack<QuantifiedToken>();
        if (sentence.length() == 0) {
            return tokens;
        }
        for (char letter : characters = sentence.toCharArray()) {
            QuantifiedToken newToken = this.buildCharToken(letter);
            if (tokens.size() == 0) {
                tokens.add(newToken);
                continue;
            }
            if (newToken.regex.equals(tokens.peek().regex)) {
                int newQuantifier = tokens.peek().quantifier + 1;
                tokens.push(tokens.pop().withQuantifier(newQuantifier));
                continue;
            }
            tokens.add(newToken);
        }
        return tokens;
    }

    public List<Token> build(String word) {
        ArrayList<Token> tokens = new ArrayList<Token>();
        tokens.add(new SimpleToken(TokenBuilder.betterEscape(word), 4));
        tokens.add(new Anything(true));
        tokens.add(new Anything(false));
        for (Token token : WHOLE_WORD_TOKENS) {
            if (!token.compatible(word)) continue;
            tokens.add(token);
        }
        return tokens;
    }

    static String betterEscape(String sentence) {
        return REGEX_ESCAPE.matcher(sentence).replaceAll("\\\\$0");
    }
}

