/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.partitioning;

import com.dataiku.dip.CodedRuntimeException;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.InfoMessage;
import com.dataiku.dip.datasets.DatasetCodes;
import com.dataiku.dip.datasets.DatasetInspector;
import com.dataiku.dip.datasets.fs.AbstractFSDatasetHandler;
import com.dataiku.dip.exceptions.CodedException;
import com.dataiku.dip.exceptions.DKUSecurityException;
import com.dataiku.dip.fs.FSPath;
import com.dataiku.dip.input.DatasetHandlerFactory;
import com.dataiku.dip.input.filter.InputFilter;
import com.dataiku.dip.logging.LimitedLogContext;
import com.dataiku.dip.logging.LimitedLogFactory;
import com.dataiku.dip.partitioning.Dimension;
import com.dataiku.dip.partitioning.DimensionValue;
import com.dataiku.dip.partitioning.ExactValueDimension;
import com.dataiku.dip.partitioning.ExactValueDimensionValue;
import com.dataiku.dip.partitioning.FileBucket;
import com.dataiku.dip.partitioning.FilePartition;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.partitioning.PartitioningScheme;
import com.dataiku.dip.partitioning.TimeDimension;
import com.dataiku.dip.partitioning.TimeDimensionValue;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.PathUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Level;

public class FilePartitioner {
    private final PartitioningScheme scheme;
    private final String filePathPattern;
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.partitioning");

    public FilePartitioner(PartitioningScheme scheme) {
        this(scheme, scheme.getFilePathPattern());
    }

    protected FilePartitioner(PartitioningScheme scheme, String pattern) {
        this.filePathPattern = pattern != null ? PathUtils.makeLeadingNoTrailing((String)pattern) : "";
        this.scheme = scheme;
    }

    public static FilePartition getPartition(AuthCtx authCtx, Dataset dataset, String identifier) throws IOException, InterruptedException, DKUSecurityException, CodedException {
        List<FilePartition> list = FilePartitioner.listPartitions(authCtx, dataset);
        for (FilePartition fp : list) {
            if (!fp.id().equals(identifier)) continue;
            return fp;
        }
        return null;
    }

    public List<String> getPrefixesForFilter(InputFilter filter) {
        if (filter == null || filter.getPartitionsClause() == null) {
            return null;
        }
        if (this.scheme == null || !FilePartitioner.isSchemeRepresentableAsFolder(this.scheme)) {
            return null;
        }
        ArrayList prefixes = Lists.newArrayList();
        for (Partition p : filter.getPartitionsClause()) {
            String prefix = FilePartitioner.computePartitionRelPathAsFolder(p, this.scheme);
            prefixes.add(prefix);
        }
        return prefixes;
    }

    public static List<FilePartition> listPartitions(AuthCtx authCtx, Dataset dataset) throws IOException, InterruptedException, DKUSecurityException, CodedException {
        if (!DatasetInspector.isFS(dataset)) {
            throw ErrorContext.iaef((String)"Can't enumerate partitions on dataset %s, invalid type %s", (Object)dataset.getName(), (Object[])new Object[]{dataset.getType()});
        }
        AbstractFSDatasetHandler descr = (AbstractFSDatasetHandler)DatasetHandlerFactory.build(authCtx, dataset);
        try {
            List<FilePartition> out;
            List<FSPath> paths;
            try {
                paths = descr.enumerateFilesystem();
            }
            catch (FileNotFoundException e) {
                logger.info((Object)"Trying to list partitions in an empty dataset", (Throwable)e);
                ArrayList arrayList = Lists.newArrayList();
                if (descr != null) {
                    descr.close();
                }
                return arrayList;
            }
            logger.info((Object)("Dataset " + dataset.getName() + " -> " + paths.size() + " paths"));
            if (!dataset.getPartitioningSchema().isPartitioned()) {
                FilePartition fp = new FilePartition(dataset.getPartitioningSchema());
                FileBucket fb = new FileBucket();
                fb.paths.addAll(paths);
                fp.buckets.add(fb);
                out = new ArrayList<FilePartition>();
                out.add(fp);
            } else {
                FilePartitioner partitioner = new FilePartitioner(dataset.getPartitioningSchema());
                out = partitioner.getPartitionsFromFiles(paths);
            }
            logger.info((Object)("Dataset " + dataset.getName() + " -> " + out.size() + " partition"));
            List<FilePartition> list = out;
            return list;
        }
        finally {
            if (descr != null) {
                try {
                    descr.close();
                }
                catch (Throwable throwable) {
                    Throwable throwable2;
                    throwable2.addSuppressed(throwable);
                }
            }
        }
    }

    public static boolean isSchemeRepresentableAsFolder(PartitioningScheme scheme) {
        if (scheme.getFilePathPattern() == null) {
            return false;
        }
        String resolvedPattern = PathUtils.makeLeadingNoTrailing((String)scheme.getFilePathPattern());
        resolvedPattern = resolvedPattern.replaceAll("%\\(([^%]+?)\\)\\{[^\\}]*\\}", "EXACT");
        resolvedPattern = resolvedPattern.replaceAll("%\\{[^\\}]*\\}", "EXACT");
        if ((resolvedPattern = resolvedPattern.replaceAll("%[_0]?[MDHY]", "TIME")).endsWith("/.*")) {
            String[] chunks;
            for (String chunk : chunks = resolvedPattern.substring(0, resolvedPattern.length() - 3).split("/")) {
                if (!chunk.contains("*")) continue;
                return false;
            }
            return true;
        }
        return false;
    }

    public static String computePartitionRelPathAsFolder(Partition p, PartitioningScheme scheme) {
        FilePartitioner fp = new FilePartitioner(scheme);
        String resolvedPattern = fp.resolvePartitionIdentifierInPattern(p);
        if (resolvedPattern.endsWith("/.*")) {
            return StringUtils.replace((String)resolvedPattern, (String)"/.*", (String)"/");
        }
        if (resolvedPattern.contains("*")) {
            throw new CodedRuntimeException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_PARTITIONING_CONFIG, "Can't resolve the path of this partition to a valid folder:  " + resolvedPattern);
        }
        return resolvedPattern;
    }

    public static String computePartitionRelPathAsShellGlob(Partition p, PartitioningScheme scheme) {
        FilePartitioner fp = new FilePartitioner(scheme);
        String resolvedPattern = fp.resolvePartitionIdentifierInPattern(p);
        String globified = resolvedPattern.replace(".*", "*").replace("\\.", ".");
        if (globified.contains("\\")) {
            throw new CodedRuntimeException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_PARTITIONING_CONFIG, "Don't know how to transform partition pattern " + scheme.getFilePathPattern() + " into a valid glob expression. Consider using a sync recipe to simplify your files organization");
        }
        return globified;
    }

    public String resolvePartitionIdentifierInPattern(Partition p) {
        if (FilePartitioner.isNotPartitioned(p, this.scheme)) {
            return "/";
        }
        if (this.filePathPattern == null) {
            throw ErrorContext.iae((String)"File path pathPattern is not defined");
        }
        String pattern = PathUtils.makeLeadingNoTrailing((String)this.filePathPattern);
        return FilePartitioner.resolvePartitionIdentifierInPath(p, pattern, this.scheme, false);
    }

    private static boolean isNotPartitioned(Partition p, PartitioningScheme scheme) {
        return scheme == null || !scheme.isPartitioned() || p.isNP() || p.isAll();
    }

    public static String resolvePartitionIdentifierInURL(Partition p, PartitioningScheme scheme, String url) {
        if (FilePartitioner.isNotPartitioned(p, scheme)) {
            return url;
        }
        return FilePartitioner.resolvePartitionIdentifierInPath(p, url, scheme, true);
    }

    private static String resolvePartitionIdentifierInPath(Partition p, String url, PartitioningScheme scheme, boolean isUrl) {
        for (int i = 0; i < scheme.getDimensionNames().size(); ++i) {
            String dname = (String)scheme.getDimensionNames().get(i);
            Dimension d = scheme.getDimension(dname);
            if (d instanceof TimeDimension) {
                TimeDimensionValue tdv = (TimeDimensionValue)p.getDimensionValues().get(dname);
                url = tdv.resolvePattern(url);
                continue;
            }
            if (d instanceof ExactValueDimension) {
                String pid;
                ExactValueDimensionValue edv = (ExactValueDimensionValue)p.getDimensionValues().get(dname);
                if (isUrl) {
                    try {
                        pid = URLEncoder.encode(edv.id(), "UTF-8");
                    }
                    catch (UnsupportedEncodingException e) {
                        pid = edv.id();
                    }
                } else {
                    pid = edv.id();
                }
                url = url.replaceAll("%(\\(([^%]+?)\\))?\\{" + Pattern.quote(dname) + "\\}", pid);
                continue;
            }
            throw new NotImplementedException();
        }
        return url;
    }

    public static String substitutePartitionIdentifierInPathForSuggestedName(String url, PartitioningScheme scheme) {
        for (int i = 0; i < scheme.getDimensionNames().size(); ++i) {
            String dname = (String)scheme.getDimensionNames().get(i);
            Dimension d = scheme.getDimension(dname);
            if (d instanceof TimeDimension) {
                url = url.replaceAll("%Y", "year");
                url = url.replaceAll("%M", "month");
                url = url.replaceAll("%D", "day");
                url = url.replaceAll("%H", "hour");
                continue;
            }
            if (d instanceof ExactValueDimension) {
                url = url.replaceAll("%(\\(([^%]+?)\\))?\\{" + Pattern.quote(dname) + "\\}", dname);
                continue;
            }
            throw new NotImplementedException();
        }
        return url;
    }

    public static String getRelPath(Partition p, PartitioningScheme scheme) {
        if (p != null && scheme != null && scheme.isPartitioned()) {
            String partitionPath = FilePartitioner.computePartitionRelPathAsFolder(p, scheme);
            return "/" + partitionPath;
        }
        return "";
    }

    public static String getSplitFileRelPath(Partition p, PartitioningScheme scheme, int splitId, String extension) {
        return FilePartitioner.getRelPath(p, scheme) + "/out-" + splitId + "." + extension;
    }

    private void assertPeriodPresent(TimeDimension.Period timeDimensionPeriod, String filePathPattern) {
        if (!Pattern.compile(timeDimensionPeriod.patternPattern()).matcher(filePathPattern).find()) {
            throw new CodedRuntimeException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_PARTITIONING_CONFIG, "To partition on this period, you need to have " + timeDimensionPeriod.pattern() + " in your path pattern.");
        }
    }

    private Pattern setMatcher(TimeDimension.Period select, String path, boolean strictMode) {
        for (TimeDimension.Period period : TimeDimension.Period.values()) {
            Pattern p = Pattern.compile(period.patternPattern());
            Matcher m = p.matcher(path);
            if (!m.find() || m.group() == null) continue;
            String match = m.group();
            String unselectedPattern = period == TimeDimension.Period.YEAR ? "[0-9]{4,4}" : (match.startsWith("%_") ? "[0-9]{1,2}" : (match.startsWith("%0") ? "[0-9]{2,2}" : (strictMode ? "[0-9]{2,2}" : "[0-9]{1,2}")));
            path = period == select ? path.replace(match, "(?<dim>" + unselectedPattern + ")") : path.replace(match, unselectedPattern);
        }
        return this.compilePattern(this.replaceExactValueDimensionsByPattern(path));
    }

    private String replaceExactValueDimensionsByPattern(String originalPath) {
        return originalPath.replaceAll("%\\(([^%]+?)\\)\\{[^\\}]*\\}", "$1").replaceAll("%\\{[^\\}]*\\}", "[^/]*");
    }

    private TimePartitionHandler computeTimePartitionHandler(TimeDimension tpd, String pattern) {
        TimePartitionHandler ret = new TimePartitionHandler();
        this.assertPeriodPresent(tpd.mappedPeriod, pattern);
        boolean strictMode = !this.isSeparatorsBtwTimePeriods(pattern);
        ret.year = this.setMatcher(TimeDimension.Period.YEAR, pattern, strictMode);
        ret.month = this.setMatcher(TimeDimension.Period.MONTH, pattern, strictMode);
        ret.day = this.setMatcher(TimeDimension.Period.DAY, pattern, strictMode);
        ret.hour = this.setMatcher(TimeDimension.Period.HOUR, pattern, strictMode);
        return ret;
    }

    private boolean isSeparatorsBtwTimePeriods(String pattern) {
        HashMap<Integer, Integer> indexToLength = new HashMap<Integer, Integer>();
        for (TimeDimension.Period p : TimeDimension.Period.values()) {
            int index = pattern.indexOf(p.pattern());
            if (index <= -1) continue;
            indexToLength.put(index, p.pattern().length());
        }
        ArrayList indexList = new ArrayList(indexToLength.keySet());
        Collections.sort(indexList);
        for (int i = 0; i < indexList.size() - 1; ++i) {
            int index = (Integer)indexList.get(i);
            int nextIndex = (Integer)indexList.get(i + 1);
            if (index <= -1 || index + (Integer)indexToLength.get(index) != nextIndex) continue;
            return false;
        }
        return true;
    }

    protected String escapePattern(String p) {
        return p;
    }

    public List<FilePartition> getPartitionsFromFiles(List<FSPath> inputPaths) {
        String pathPattern = this.escapePattern(this.filePathPattern);
        HashMap<Object, FilePartition> partitions = new HashMap<Object, FilePartition>();
        FilePartition fp = new FilePartition(this.scheme);
        fp.buckets.add(new FileBucket().withPaths(inputPaths));
        partitions.put("NP", fp);
        try (LimitedLogContext noMatchWarnings = LimitedLogFactory.get((DKULogger)logger, (String)"filePartitioner.noMatch", (Level)Level.WARN);){
            for (String dimName : this.scheme.getDimensionNames()) {
                Dimension dimension = this.scheme.getDimension(dimName);
                HashMap<Object, FilePartition> prevParts = partitions;
                partitions = new HashMap();
                if (dimension instanceof TimeDimension) {
                    TimeDimension tpd = (TimeDimension)dimension;
                    TimePartitionHandler timePartitionHandler = this.computeTimePartitionHandler(tpd, pathPattern);
                    for (FilePartition prevPart : prevParts.values()) {
                        String prevPartId = prevPart.id();
                        for (FSPath path : prevPart.buckets.get((int)0).paths) {
                            assert (PathUtils.isLNT((String)path.path()));
                            try {
                                TimeDimensionValue timeDim = new TimeDimensionValue(tpd);
                                switch (tpd.mappedPeriod) {
                                    case HOUR: {
                                        timeDim.setHour(this.getInteger(path.path(), timePartitionHandler.hour));
                                    }
                                    case DAY: {
                                        timeDim.setDay(this.getInteger(path.path(), timePartitionHandler.day));
                                    }
                                    case MONTH: {
                                        timeDim.setMonth(this.getInteger(path.path(), timePartitionHandler.month));
                                    }
                                    case YEAR: {
                                        timeDim.setYear(this.getInteger(path.path(), timePartitionHandler.year));
                                        break;
                                    }
                                    default: {
                                        throw new Error("Never Reached");
                                    }
                                }
                                String newId = prevPartId + "|" + timeDim.id();
                                FilePartition p = (FilePartition)((Object)partitions.get(newId));
                                if (p == null) {
                                    p = (FilePartition)prevPart.copy();
                                    p.buckets.add(new FileBucket());
                                    p.setDimensionValue(dimension.getName(), (DimensionValue)timeDim);
                                    partitions.put(newId, p);
                                }
                                p.buckets.get((int)0).paths.add(path);
                            }
                            catch (Exception e) {
                                if (this.scheme.getIgnoreNonMatchingFile()) continue;
                                noMatchWarnings.log("Can't match file '" + path.path() + "' to a partition: " + e.getMessage());
                            }
                        }
                    }
                    continue;
                }
                String usedPattern = pathPattern;
                String mdhPattern = this.isSeparatorsBtwTimePeriods(usedPattern) ? "[0-9]{1,2}" : "[0-9]{2,2}";
                usedPattern = usedPattern.replaceAll("%\\(([^%]+?)\\)\\{" + Pattern.quote(dimension.getName()) + "\\}", "(?<dim>$1)");
                usedPattern = usedPattern.replace("%{" + dimension.getName() + "}", "(?<dim>[^/]*)");
                usedPattern = this.replaceExactValueDimensionsByPattern(usedPattern);
                usedPattern = usedPattern.replaceAll("%[MDH]", mdhPattern);
                usedPattern = usedPattern.replaceAll("%0[MDH]", "[0-9]{2,2}");
                usedPattern = usedPattern.replaceAll("%_[MDH]", "[0-9]{1,2}");
                usedPattern = usedPattern.replaceAll("%Y", "[0-9]{4,4}");
                Pattern pattern = this.compilePattern(usedPattern);
                for (FilePartition prevPart : prevParts.values()) {
                    String prevPartId = prevPart.id();
                    for (FSPath path : prevPart.buckets.get((int)0).paths) {
                        assert (PathUtils.isLNT((String)path.path()));
                        try {
                            Matcher matcher = pattern.matcher(path.path());
                            matcher.matches();
                            String value = matcher.group("dim");
                            String newId = prevPartId + "|" + value;
                            FilePartition p = (FilePartition)((Object)partitions.get(newId));
                            if (p == null) {
                                p = (FilePartition)prevPart.copy();
                                p.buckets.add(new FileBucket());
                                p.setDimensionValue(dimension.getName(), (DimensionValue)new ExactValueDimensionValue(value));
                                partitions.put(newId, p);
                            }
                            p.buckets.get((int)0).paths.add(path);
                        }
                        catch (Exception e) {
                            if (this.scheme.getIgnoreNonMatchingFile()) continue;
                            noMatchWarnings.log("Can't match file " + path.path() + " to a partition: " + e.getMessage() + " " + pathPattern);
                        }
                    }
                }
            }
        }
        return Lists.newArrayList(partitions.values());
    }

    protected Pattern compilePattern(String regex) {
        return Pattern.compile(regex);
    }

    private int getInteger(String path, Pattern pattern) {
        Matcher m = pattern.matcher(path);
        if (!m.matches() || m.groupCount() == 0) {
            throw new IllegalArgumentException("File '" + path + "' does not match pattern '" + pattern.pattern() + "'");
        }
        return Integer.parseInt(m.group("dim"));
    }

    public ResolvedFilesFilterResult filterPartitionList(List<? extends Partition> input, InputFilter filter) {
        ResolvedFilesFilterResult result = new ResolvedFilesFilterResult();
        ArrayList<FilePartition> out = new ArrayList<FilePartition>();
        if (filter != null && filter.getPartitionsClause() != null) {
            block0: for (Partition partition : input) {
                for (Partition toSelect : filter.getPartitionsClause()) {
                    if (!partition.id().equals(toSelect.id())) continue;
                    out.add((FilePartition)partition);
                    continue block0;
                }
            }
            result.needRefilter = true;
        } else {
            out.addAll(input);
        }
        result.matchingFilePartitions = out;
        return result;
    }

    public static Map<TimeDimension.Period, String> guessTimePeriodColumnNames(String pattern) {
        HashMap ret = Maps.newHashMap();
        String[] chunks = pattern.split("/");
        Pattern timePeriodPattern = Pattern.compile("^(.*)=%[_0]?(Y|M|D|H)$");
        for (String chunk : chunks) {
            Matcher timePeriodMatcher = timePeriodPattern.matcher(chunk);
            if (!timePeriodMatcher.matches()) continue;
            TimeDimension.Period p = null;
            switch (timePeriodMatcher.group(2)) {
                case "Y": {
                    p = TimeDimension.Period.YEAR;
                    break;
                }
                case "M": {
                    p = TimeDimension.Period.MONTH;
                    break;
                }
                case "D": {
                    p = TimeDimension.Period.DAY;
                    break;
                }
                case "H": {
                    p = TimeDimension.Period.HOUR;
                }
            }
            if (p == null) continue;
            ret.put(p, timePeriodMatcher.group(1));
        }
        return ret;
    }

    private static class TimePartitionHandler {
        Pattern year;
        Pattern month;
        Pattern day;
        Pattern hour;

        private TimePartitionHandler() {
        }
    }

    public static class ResolvedFilesFilterResult {
        public List<FilePartition> matchingFilePartitions = new ArrayList<FilePartition>();
        public boolean needRefilter;

        public List<FSPath> getAllPaths() {
            ArrayList<FSPath> ret = new ArrayList<FSPath>();
            for (FilePartition fp : this.matchingFilePartitions) {
                ret.addAll(fp.getAllPaths());
            }
            return ret;
        }

        public long getTotalSize() {
            long ret = 0L;
            for (FilePartition fp : this.matchingFilePartitions) {
                ret += fp.getTotalSize();
            }
            return ret;
        }
    }
}

