/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.scoring.pipelines;

import com.dataiku.scoring.pipelines.Processor;
import com.dataiku.scoring.pipelines.Tokenizer;
import com.dataiku.scoring.util.RawObservation;
import java.util.Map;

public class HashingVectorizer
implements Processor {
    private static final long serialVersionUID = 0L;
    private final String[] columns;
    private final Hasher hasher;
    private final Tokenizer[] tokenizer;
    private final int[] numFeatures;

    public HashingVectorizer(String[] columns, Method method, Tokenizer[] tokenizer, int[] numFeatures) {
        this.columns = columns;
        this.tokenizer = tokenizer;
        this.numFeatures = numFeatures;
        this.hasher = method.hasher;
    }

    private static String name(String col, int hash) {
        return "hvect:" + col + ":" + hash;
    }

    @Override
    public void process(RawObservation data) {
        for (int i = 0; i < this.columns.length; ++i) {
            String col = this.columns[i];
            Object s = data.get(col);
            if (s == null || !(s instanceof String)) {
                data.setError("Found non-text or missing data in " + col);
                return;
            }
            Map<String, Double> counts = this.tokenizer[i].tokenCounts((String)s);
            for (Map.Entry<String, Double> e : counts.entrySet()) {
                int hash = this.hasher.hash(e.getKey(), this.numFeatures[i]);
                data.put(HashingVectorizer.name(col, hash), e.getValue());
            }
        }
    }

    public String toString() {
        StringBuilder s = new StringBuilder().append("HashingVectorizer(");
        for (int i = 0; i < this.columns.length; ++i) {
            s.append(this.columns[i]);
            if (i == this.columns.length - 1) continue;
            s.append(" ; ");
        }
        return s.append(")").toString();
    }

    public static enum Method {
        MLLIB(new Hasher(){

            @Override
            public int hash(String s, int numFeatures) {
                int raw = s.hashCode() % numFeatures;
                return raw < 0 ? raw + numFeatures : raw;
            }
        });

        final Hasher hasher;

        private Method(Hasher hasher) {
            this.hasher = hasher;
        }
    }

    static interface Hasher {
        public int hash(String var1, int var2);
    }
}

