/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.scoring.pipelines;

import com.dataiku.scoring.pipelines.Processor;
import com.dataiku.scoring.pipelines.Tokenizer;
import com.dataiku.scoring.util.RawObservation;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class TfidfVectorizer
implements Processor {
    private static final long serialVersionUID = 0L;
    private final Tokenizer[] tokenizers;
    private final String[] columns;
    private final List<Map<String, Double>> vocabularyMaps;
    private final Normalization[] normalization;
    private final List<Map<String, String>> outputNames;

    public TfidfVectorizer(String[] columns, Tokenizer[] tokenizer, String[][] vocabularies, double[][] idf, Normalization[] norm, String[][] outputNames) {
        this.tokenizers = tokenizer;
        this.normalization = norm;
        if (columns.length != vocabularies.length) {
            throw new IllegalArgumentException("Columns and values must have the same length");
        }
        this.columns = columns;
        this.vocabularyMaps = new ArrayList<Map<String, Double>>();
        this.outputNames = new ArrayList<Map<String, String>>();
        for (int i = 0; i < vocabularies.length; ++i) {
            HashMap<String, Double> voc = new HashMap<String, Double>();
            HashMap<String, String> out = new HashMap<String, String>();
            for (int j = 0; j < vocabularies[i].length; ++j) {
                voc.put(vocabularies[i][j], idf[i][j]);
                out.put(vocabularies[i][j], outputNames[i][j]);
            }
            this.vocabularyMaps.add(voc);
            this.outputNames.add(out);
        }
    }

    public Tokenizer[] getTokenizers() {
        return Arrays.copyOf(this.tokenizers, this.tokenizers.length);
    }

    public String[] getColumns() {
        return Arrays.copyOf(this.columns, this.columns.length);
    }

    public List<Map<String, Double>> getIdfs() {
        return this.vocabularyMaps;
    }

    public List<Set<String>> getVocabularies() {
        ArrayList<Set<String>> result = new ArrayList<Set<String>>();
        for (Map<String, Double> e : this.vocabularyMaps) {
            result.add(e.keySet());
        }
        return result;
    }

    @Override
    public void process(RawObservation data) {
        for (int i = 0; i < this.columns.length; ++i) {
            double val;
            Double idf;
            String col = this.columns[i];
            Object text = data.get(col);
            if (text == null) {
                text = "";
            } else if (!(text instanceof String)) {
                data.setError("Text expected but " + String.valueOf(text.getClass()) + " found.");
                return;
            }
            Map<String, Double> counts = this.tokenizers[i].tokenCounts((String)text);
            double norm = 0.0;
            for (Map.Entry<String, Double> e : counts.entrySet()) {
                idf = this.vocabularyMaps.get(i).get(e.getKey());
                if (idf == null) continue;
                val = e.getValue() * idf;
                switch (this.normalization[i]) {
                    case NONE: {
                        break;
                    }
                    case L1: {
                        norm += Math.abs(val);
                        break;
                    }
                    case L2: {
                        norm += val * val;
                    }
                }
            }
            if (norm <= 0.0) {
                norm = 1.0;
            }
            if (this.normalization[i] == Normalization.L2) {
                norm = Math.sqrt(norm);
            }
            norm = 1.0 / norm;
            for (Map.Entry<String, Double> e : counts.entrySet()) {
                idf = this.vocabularyMaps.get(i).get(e.getKey());
                if (idf == null) continue;
                val = norm * e.getValue() * idf;
                data.put(this.outputNames.get(i).get(e.getKey()), val);
            }
        }
    }

    public String toString() {
        StringBuilder s = new StringBuilder().append("TfidfVectorizer(");
        for (int i = 0; i < this.columns.length; ++i) {
            s.append(this.columns[i]);
            if (i == this.columns.length - 1) continue;
            s.append(" ; ");
        }
        return s.append(")").toString();
    }

    public static enum Normalization {
        NONE,
        L1,
        L2;

    }
}

