/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.scoring.pipelines;

import java.io.Serializable;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Tokenizer
implements Serializable {
    private static final long serialVersionUID = 0L;
    private final Pattern tokenPattern;
    private final Set<String> stopWords;
    private final boolean toLowerCase;
    private final int minNGrams;
    private final int maxNGrams;
    public static final String WORD_TOKEN_EXPRESSION = "(?u)\\b\\w\\w+\\b";

    public Tokenizer(String tokenExpression, String[] stopWords, int minNGrams, int maxNGrams, boolean toLowerCase) {
        this.toLowerCase = toLowerCase;
        this.tokenPattern = Pattern.compile(tokenExpression);
        this.stopWords = new HashSet<String>();
        for (String s : stopWords) {
            this.stopWords.add(toLowerCase ? s.toLowerCase() : s);
        }
        this.minNGrams = minNGrams;
        this.maxNGrams = maxNGrams;
    }

    public List<String> tokenList(String document) {
        ArrayList<String> tokens = new ArrayList<String>();
        Matcher m = this.tokenPattern.matcher(this.toLowerCase ? document.toLowerCase() : document);
        while (m.find()) {
            String token = m.group(0);
            if (this.stopWords.contains(token)) continue;
            tokens.add(token);
        }
        ArrayList<String> grams = new ArrayList<String>();
        for (int k = this.minNGrams; k <= this.maxNGrams; ++k) {
            for (int i = 0; i <= tokens.size() - k; ++i) {
                StringBuilder s = new StringBuilder();
                for (int j = 0; j < k; ++j) {
                    s.append((String)tokens.get(i + j));
                    if (j == k - 1) continue;
                    s.append(" ");
                }
                grams.add(s.toString());
            }
        }
        return grams;
    }

    public Iterable<String> tokenStream(String document) {
        return new Tokens(this.toLowerCase ? document.toLowerCase() : document);
    }

    public Map<String, Double> tokenCounts(String document) {
        HashMap<String, Double> counts = new HashMap<String, Double>();
        for (String token : this.tokenStream(document)) {
            Double i = (Double)counts.get(token);
            if (i == null) {
                counts.put(token, 1.0);
                continue;
            }
            counts.put(token, i + 1.0);
        }
        return counts;
    }

    private class Tokens
    implements Iterable<String> {
        private final String document;

        private Tokens(String document) {
            this.document = document;
        }

        @Override
        public Iterator<String> iterator() {
            return new TokenIterator(this.document);
        }
    }

    private class TokenIterator
    implements Iterator<String> {
        private final Matcher m;
        private String next;
        private final ArrayDeque<String>[] grams;
        private final int[] gramSizes;
        private int gramIter = 0;

        TokenIterator(String document) {
            this.m = Tokenizer.this.tokenPattern.matcher(document);
            this.grams = new ArrayDeque[Tokenizer.this.maxNGrams - Tokenizer.this.minNGrams + 1];
            this.gramSizes = new int[this.grams.length];
            for (int i = 0; i < this.gramSizes.length; ++i) {
                this.grams[i] = new ArrayDeque();
                this.gramSizes[i] = Tokenizer.this.minNGrams + i;
            }
            this.scan();
        }

        private void scan() {
            this.next = null;
            while (this.next == null && this.m.find()) {
                String token = this.m.group();
                if (Tokenizer.this.stopWords.contains(token)) continue;
                this.next = token;
                for (int i = 0; i < this.grams.length; ++i) {
                    this.grams[i].add(this.next);
                    if (this.grams[i].size() <= this.gramSizes[i]) continue;
                    this.grams[i].pop();
                }
            }
        }

        private String makeGram(int i) {
            StringBuilder s = new StringBuilder();
            int j = 0;
            for (String g : this.grams[i]) {
                s.append(g);
                if (j == this.grams[i].size() - 1) continue;
                s.append(" ");
                ++j;
            }
            return s.toString();
        }

        @Override
        public boolean hasNext() {
            return this.next != null || this.gramIter != 0;
        }

        @Override
        public String next() {
            if (this.next == null && this.gramIter == 0) {
                throw new NoSuchElementException();
            }
            String gram = this.makeGram(this.gramIter);
            int nextIter = this.gramIter + 1;
            if (this.gramIter == this.grams.length - 1 || this.grams[nextIter].size() < this.gramSizes[nextIter]) {
                this.gramIter = 0;
                this.scan();
            } else {
                ++this.gramIter;
            }
            return gram;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }
}

