/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.analysis;

import com.dataiku.dip.classpathfix.DKUInts;
import com.dataiku.dip.futures.FutureProgress;
import com.dataiku.dip.futures.FutureProgressState;
import com.dataiku.dip.io.ColumnBlock;
import com.dataiku.dip.io.LinoMetaFile;
import com.dataiku.dip.io.LinoReader;
import com.dataiku.dip.utils.DKULogger;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

public class MultiCorrelator {
    private ExecutorService es = Executors.newFixedThreadPool(4, new ThreadFactoryBuilder().setNameFormat("MultiCorrelator-%d").build());
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.correlate");

    public static void main(String[] args) throws Exception {
        MultiCorrelator mc = new MultiCorrelator();
        mc.run(new File("/Users/clement/dda/caches/shaker-full-pivot-lino/draft_meuh/e0698c22e46dc73df96109b231aefa47/"), "cache");
    }

    public MultiCorrelation run(File linoFile, String linoName) throws Exception {
        try {
            LinoReader reader = new LinoReader(linoFile, linoName);
            try {
                MultiCorrelation ret = new MultiCorrelation();
                List<LinoMetaFile.ColumnHeader> headers = reader.getColumns();
                for (LinoMetaFile.ColumnHeader header : headers) {
                    ret.variables.add(header.name);
                    CorrelationLine cl = new CorrelationLine();
                    for (int i = 0; i < headers.size(); ++i) {
                        cl.correlations.add(new Correlation());
                    }
                    ret.lines.add(cl);
                }
                int totalNbCorr = 0;
                ArrayList<Integer> futureSizes = new ArrayList<Integer>();
                ArrayList futures = new ArrayList();
                for (int x = 0; x < ret.variables.size(); ++x) {
                    System.out.println((Object)headers.get((int)x).memType);
                    if (headers.get((int)x).memType == ColumnBlock.MemoryType.DOUBLE) {
                        ArrayList<Integer> ys = new ArrayList<Integer>();
                        for (int y = 0; y < x; ++y) {
                            if (headers.get((int)y).memType == ColumnBlock.MemoryType.DOUBLE) {
                                ys.add(y);
                                continue;
                            }
                            UnrankedANOVA ure = new UnrankedANOVA();
                            ure.linoFile = linoFile;
                            ure.linoName = linoName;
                            ure.out = ret;
                            ure.x = x;
                            ure.y = y;
                            futures.add(this.es.submit(ure));
                            futureSizes.add(1);
                            ++totalNbCorr;
                        }
                        if (ys.size() <= 0) continue;
                        MultiPearsonExecutor me = new MultiPearsonExecutor();
                        me.linoFile = linoFile;
                        me.linoName = linoName;
                        me.out = ret;
                        me.x = x;
                        me.ys = DKUInts.toArray(ys);
                        futures.add(this.es.submit(me));
                        futureSizes.add(ys.size());
                        totalNbCorr += ys.size();
                        continue;
                    }
                    if (headers.get((int)x).memType != ColumnBlock.MemoryType.STRING_DICT) continue;
                    for (int y = 0; y < x; ++y) {
                        if (headers.get((int)y).memType == ColumnBlock.MemoryType.STRING_DICT) {
                            Chi2Executor c2e = new Chi2Executor();
                            c2e.linoFile = linoFile;
                            c2e.linoName = linoName;
                            c2e.out = ret;
                            c2e.x = x;
                            c2e.y = y;
                            futures.add(this.es.submit(c2e));
                            futureSizes.add(1);
                            ++totalNbCorr;
                            continue;
                        }
                        UnrankedANOVA ure = new UnrankedANOVA();
                        ure.linoFile = linoFile;
                        ure.linoName = linoName;
                        ure.out = ret;
                        ure.x = x;
                        ure.y = y;
                        futures.add(this.es.submit(ure));
                        futureSizes.add(1);
                        ++totalNbCorr;
                    }
                }
                logger.info((Object)("Starting " + totalNbCorr + " correlations"));
                long before = System.currentTimeMillis();
                try (FutureProgress.AutocloseableFutureProgressState computingState = FutureProgress.pushAutoCloseableState((String)"Computing correlations", (double)totalNbCorr, (FutureProgressState.StateUnit)FutureProgressState.StateUnit.RECORDS);){
                    int i = 0;
                    int done = 0;
                    for (Future future : futures) {
                        future.get();
                        FutureProgress.updateState((double)(done += ((Integer)futureSizes.get(++i)).intValue()));
                    }
                }
                this.es.shutdown();
                this.es.awaitTermination(1000L, TimeUnit.HOURS);
                logger.info((Object)("Done " + totalNbCorr + " correlations in " + (System.currentTimeMillis() - before) + "ms"));
                MultiCorrelation multiCorrelation = ret;
                reader.close();
                return multiCorrelation;
            }
            catch (Throwable throwable) {
                try {
                    reader.close();
                }
                catch (Throwable throwable2) {
                    throwable.addSuppressed(throwable2);
                }
                throw throwable;
            }
        }
        finally {
            this.es.shutdownNow();
        }
    }

    public static class MultiCorrelation {
        public List<String> variables = new ArrayList<String>();
        public List<CorrelationLine> lines = new ArrayList<CorrelationLine>();
    }

    public static class CorrelationLine {
        public List<Correlation> correlations = new ArrayList<Correlation>();
    }

    public static class Correlation {
        public int grade = -1;
        public CorrelationType type;
        public Double pearson;
        public Double chi2;
        public Double cramerV;
        public Double anovaF;
        public Double cohenF;
    }

    public class UnrankedANOVA
    implements Runnable {
        File linoFile;
        String linoName;
        MultiCorrelation out;
        int x;
        int y;

        @Override
        public void run() {
            try (LinoReader reader = new LinoReader(this.linoFile, this.linoName);){
                int i;
                LinoMetaFile.ColumnHeader numVar;
                LinoMetaFile.ColumnHeader catVar;
                if (reader.getColHeader((String)this.out.variables.get((int)this.x)).memType == ColumnBlock.MemoryType.STRING_DICT) {
                    catVar = reader.getColHeader(this.out.variables.get(this.x));
                    numVar = reader.getColHeader(this.out.variables.get(this.y));
                } else {
                    catVar = reader.getColHeader(this.out.variables.get(this.y));
                    numVar = reader.getColHeader(this.out.variables.get(this.x));
                }
                int I = catVar.stringDict.size();
                double average = 0.0;
                int validSamples = 0;
                double[] sampleAverages = new double[I];
                int[] sampleCounts = new int[I];
                int N = 0;
                for (int block = 0; block < reader.nblocks(); ++block) {
                    ColumnBlock catBlock = reader.readColumnBlock(catVar.name, block);
                    ColumnBlock numBlock = reader.readColumnBlock(numVar.name, block);
                    N += catBlock.nbRecords();
                    for (int r = 0; r < catBlock.nbRecords(); ++r) {
                        i = catBlock.ints[r];
                        if (Double.isNaN(numBlock.doubles[r])) continue;
                        int n = i;
                        sampleAverages[n] = sampleAverages[n] + numBlock.doubles[r];
                        int n2 = i;
                        sampleCounts[n2] = sampleCounts[n2] + 1;
                        average += numBlock.doubles[r];
                        ++validSamples;
                    }
                }
                for (int i2 = 0; i2 < I; ++i2) {
                    if (sampleCounts[i2] <= 0) continue;
                    int n = i2;
                    sampleAverages[n] = sampleAverages[n] / (double)sampleCounts[i2];
                }
                if (validSamples > 0) {
                    average /= (double)validSamples;
                }
                double cohenFNumerator = 0.0;
                double ssb = 0.0;
                for (i = 0; i < I; ++i) {
                    ssb += (double)sampleCounts[i] * Math.pow(sampleAverages[i] - average, 2.0);
                    double pi = (double)sampleCounts[i] / (double)N;
                    cohenFNumerator += pi * Math.pow(sampleAverages[i] - average, 2.0);
                }
                double ssw = 0.0;
                for (int block = 0; block < reader.nblocks(); ++block) {
                    ColumnBlock catBlock = reader.readColumnBlock(catVar.name, block);
                    ColumnBlock numBlock = reader.readColumnBlock(numVar.name, block);
                    for (int r = 0; r < catBlock.nbRecords(); ++r) {
                        int i3 = catBlock.ints[r];
                        if (Double.isNaN(numBlock.doubles[r])) continue;
                        ssw += Math.pow(numBlock.doubles[r] - sampleAverages[i3], 2.0);
                    }
                }
                int dofFactor = I - 1 - 1;
                int dofResidual = N - (I - 1);
                double s2Factor = ssb / (double)dofFactor;
                double s2Residual = ssw / (double)dofResidual;
                double F = s2Factor / s2Residual;
                double eta2 = ssb / (ssb + ssw);
                double cohenf = Math.sqrt(eta2 / (1.0 - eta2));
                double cohenf2 = Math.sqrt(cohenFNumerator / ssw);
                logger.infoV("ANOVA I=%d ssb=%.2f ssw=%.2f dofF=%d dofR=%d s2F=%.2f s2R=%.2f F=%f cN=%f eta2=" + eta2 + " cohen=%f compute2=%f", new Object[]{I, ssb, ssw, dofFactor, dofResidual, s2Factor, s2Residual, F, cohenFNumerator, cohenf, cohenf2});
                Correlation cor = this.out.lines.get((int)this.x).correlations.get(this.y);
                cor.type = CorrelationType.NUM_CAT_ANOVA;
                if (!Double.isInfinite(F) && !Double.isNaN(F)) {
                    cor.anovaF = F;
                    cor.cohenF = cohenf2;
                    cor.grade = cor.cohenF > 0.4 ? 3 : (cor.cohenF > 0.25 ? 2 : (cor.cohenF > 0.1 ? 1 : 0));
                }
            }
            catch (Exception e) {
                logger.error((Object)"Failed", (Throwable)e);
            }
        }
    }

    public class MultiPearsonExecutor
    implements Runnable {
        File linoFile;
        String linoName;
        int x;
        int[] ys;
        MultiCorrelation out;

        @Override
        public void run() {
            try (LinoReader reader = new LinoReader(this.linoFile, this.linoName);){
                PearsonTmpData[] pearsonTMP = new PearsonTmpData[this.ys.length];
                for (int i = 0; i < this.ys.length; ++i) {
                    pearsonTMP[i] = new PearsonTmpData();
                }
                for (int block = 0; block < reader.nblocks(); ++block) {
                    ColumnBlock xblock = reader.readColumnBlock(this.out.variables.get(this.x), block);
                    for (int yIdx = 0; yIdx < this.ys.length; ++yIdx) {
                        int y = this.ys[yIdx];
                        PearsonTmpData tmp = pearsonTMP[yIdx];
                        ColumnBlock yblock = reader.readColumnBlock(this.out.variables.get(y), block);
                        for (int i = 0; i < xblock.nbRecords(); ++i) {
                            double d1 = xblock.doubles[i];
                            double d2 = yblock.doubles[i];
                            if (Double.isNaN(d1) || Double.isNaN(d2)) continue;
                            tmp.sum1 += d1;
                            tmp.sum2 += d2;
                            tmp.sum1SQ += Math.pow(d1, 2.0);
                            tmp.sum2SQ += Math.pow(d2, 2.0);
                            tmp.psum += d1 * d2;
                            ++tmp.count;
                        }
                    }
                }
                for (int yIdx = 0; yIdx < this.ys.length; ++yIdx) {
                    PearsonTmpData tmp = pearsonTMP[yIdx];
                    int y = this.ys[yIdx];
                    double n = tmp.psum - tmp.sum1 * tmp.sum2 / (double)tmp.count;
                    double d = Math.sqrt((tmp.sum1SQ - tmp.sum1 * tmp.sum1 / (double)tmp.count) * (tmp.sum2SQ - tmp.sum2 * tmp.sum2 / (double)tmp.count));
                    double rho = d == 0.0 ? 0.0 : n / d;
                    if (Double.isNaN(rho)) {
                        logger.error((Object)("NaN  : " + this.out.variables.get(this.x) + " and " + this.out.variables.get(y)));
                        rho = 0.0;
                    }
                    Correlation cor = this.out.lines.get((int)this.x).correlations.get(y);
                    cor.type = CorrelationType.NUM_NUM_PEARSON;
                    cor.pearson = rho;
                    cor.grade = (int)Math.floor(Math.abs(rho) / 0.25);
                }
            }
            catch (IOException e) {
                logger.error((Object)"failed", (Throwable)e);
            }
            logger.info((Object)"thread done");
        }
    }

    public class Chi2Executor
    implements Runnable {
        File linoFile;
        String linoName;
        MultiCorrelation out;
        int x;
        int y;

        @Override
        public void run() {
            try (LinoReader reader = new LinoReader(this.linoFile, this.linoName);){
                int mostGroups;
                int I = reader.getColHeader((String)this.out.variables.get((int)this.x)).stringDict.size();
                int J = reader.getColHeader((String)this.out.variables.get((int)this.y)).stringDict.size();
                if (I * J > 100) {
                    logger.info((Object)("Abort correlation " + reader.getColHeader((String)this.out.variables.get((int)this.x)).name + reader.getColHeader((String)this.out.variables.get((int)this.y)).name + ": too many DOF"));
                    return;
                }
                int[] Oiplus = new int[I];
                int[] Oplusj = new int[J];
                int[] Oij = new int[I * J];
                int N = 0;
                for (int block = 0; block < reader.nblocks(); ++block) {
                    ColumnBlock xblock = reader.readColumnBlock(this.out.variables.get(this.x), block);
                    ColumnBlock yblock = reader.readColumnBlock(this.out.variables.get(this.y), block);
                    N += xblock.nbRecords();
                    for (int r = 0; r < xblock.nbRecords(); ++r) {
                        int i = xblock.ints[r];
                        int j = yblock.ints[r];
                        int n = i;
                        Oiplus[n] = Oiplus[n] + 1;
                        int n2 = j;
                        Oplusj[n2] = Oplusj[n2] + 1;
                        int n3 = I * j + i;
                        Oij[n3] = Oij[n3] + 1;
                    }
                }
                double T = 0.0;
                for (int i = 1; i < I; ++i) {
                    for (int j = 1; j < J; ++j) {
                        double Eij = Oiplus[i] * Oplusj[j] / N;
                        double tdiff = Math.pow((double)Oij[I * j + i] - Eij, 2.0) / Eij;
                        if (!(Eij > 0.0)) continue;
                        T += tdiff;
                    }
                }
                Correlation cor = this.out.lines.get((int)this.x).correlations.get(this.y);
                cor.type = CorrelationType.CAT_CAT_CHI2;
                cor.chi2 = T;
                cor.cramerV = Math.sqrt(T / (double)(N * (Math.min(I, J) - 1)));
                if (Double.isNaN(cor.cramerV)) {
                    cor.cramerV = 0.0;
                }
                if ((mostGroups = Math.min(I, J)) > 4) {
                    cor.grade = cor.cramerV > 0.3 ? 3 : (cor.cramerV > 0.18 ? 2 : (cor.cramerV > 0.05 ? 1 : 0));
                } else if (mostGroups == 3) {
                    cor.grade = cor.cramerV > 0.35 ? 3 : (cor.cramerV > 0.2 ? 2 : (cor.cramerV > 0.07 ? 1 : 0));
                } else if (mostGroups == 2) {
                    cor.grade = cor.cramerV > 0.5 ? 3 : (cor.cramerV > 0.3 ? 2 : (cor.cramerV > 0.1 ? 1 : 0));
                }
            }
            catch (Exception e) {
                logger.error((Object)"Failed", (Throwable)e);
            }
        }
    }

    static class PearsonTmpData {
        double sum1 = 0.0;
        double sum2 = 0.0;
        double sum1SQ = 0.0;
        double sum2SQ = 0.0;
        double psum = 0.0;
        long count;

        PearsonTmpData() {
        }
    }

    public static enum CorrelationType {
        NUM_NUM_PEARSON,
        NUM_CAT_ANOVA,
        CAT_CAT_CHI2;

    }
}

