/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.datasets.fs;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.ProxySettings;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.InfoMessage;
import com.dataiku.dip.dataflow.ComputableHashComputer;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamColumnFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamRowFactory;
import com.dataiku.dip.datasets.DatasetCodes;
import com.dataiku.dip.datasets.DatasetHandler;
import com.dataiku.dip.datasets.DatasetReadiness;
import com.dataiku.dip.datasets.FSProviderCodes;
import com.dataiku.dip.datasets.StreamableDatasetSelection;
import com.dataiku.dip.datasets.UniversalSingleThreadPusher;
import com.dataiku.dip.datasets.fs.AbstractFSLikeDatasetHandler;
import com.dataiku.dip.datasets.fs.BuiltinFSDatasets;
import com.dataiku.dip.datasets.fs.FSDatasetUtils;
import com.dataiku.dip.datasets.fs.HTTPDatasetTestHandler;
import com.dataiku.dip.datasets.fs.HTTPFSProvider;
import com.dataiku.dip.exceptions.CodedException;
import com.dataiku.dip.exceptions.CodedIOException;
import com.dataiku.dip.exceptions.DKUSecurityException;
import com.dataiku.dip.fs.FSEnumerationSettings;
import com.dataiku.dip.fs.FSPath;
import com.dataiku.dip.fs.FSPathOrDirectory;
import com.dataiku.dip.input.DatasetTestHandler;
import com.dataiku.dip.input.filter.FilterResultWithSplits;
import com.dataiku.dip.input.filter.InputFilter;
import com.dataiku.dip.input.stream.EnrichedInputStream;
import com.dataiku.dip.input.stream.StreamsInputSplit;
import com.dataiku.dip.input.utils.CountingProcessorOutput;
import com.dataiku.dip.output.Output;
import com.dataiku.dip.partitioning.FilePartitioner;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.partitioning.PartitionFactory;
import com.dataiku.dip.partitioning.PartitioningScheme;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.utils.DKUtils;
import com.dataiku.dip.warnings.WarningsContext;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nullable;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

public class HTTPDatasetHandler
extends AbstractFSLikeDatasetHandler {
    private final BuiltinFSDatasets.HTTPDatasetConfig config;
    private final HTTPFSProvider provider;
    private static Logger logger = Logger.getLogger((String)"dku.input.http");

    public HTTPDatasetHandler(AuthCtx authCtx, Dataset dataset) {
        super(authCtx, dataset);
        assert (dataset.getParams() instanceof BuiltinFSDatasets.HTTPDatasetConfig);
        this.config = (BuiltinFSDatasets.HTTPDatasetConfig)dataset.getParams();
        boolean useGlobalProxy = this.config.useGlobalProxy;
        this.provider = new HTTPFSProvider(authCtx, null, this.config.consider404AsEmpty, this.config.fallbackHeadToGet, this.config.trustAnySSLCertificate, useGlobalProxy && ApplicationConfigurator.getProxySettings() != null ? ApplicationConfigurator.getProxySettings() : new ProxySettings());
    }

    public InputSplit getSingleSplit() throws CodedException {
        Partition partition = !this.dataset.getPartitioningSchema().isPartitioned() ? null : (this.config.partitions.isEmpty() ? PartitionFactory.fromIdentifier(this.dataset.getPartitioningSchema(), this.config.previewPartition) : Partition.newALL((PartitioningScheme)this.dataset.getPartitioningSchema()));
        return this.getPartitionSplit(partition);
    }

    public InputSplit getSampleSplit() throws CodedIOException, CodedException {
        FSPath path = this.getFirstNonEmptyPath(null);
        if (path == null) {
            return new InputSplit();
        }
        String url = path.path().substring(1);
        return new InputSplit(Collections.singletonList(url), url);
    }

    @Override
    public void close() throws IOException {
        this.provider.close();
    }

    @Override
    public DatasetHandler.DatasetMeta<?, ?> getMeta() {
        return BuiltinFSDatasets.HTTP_META;
    }

    @Override
    public void checkConfiguration() throws CodedException {
        Partition p = this.dataset.getPartitioningSchema().isPartitioned() ? Partition.randomSamplePartition((PartitioningScheme)this.dataset.getPartitioningSchema()) : null;
        List<BuiltinFSDatasets.HTTPSource> sources = this.getSources(p);
        if (sources == null) {
            return;
        }
        for (BuiltinFSDatasets.HTTPSource source : sources) {
            try {
                URI uri = new URI(source.url);
                if ("http".equalsIgnoreCase(uri.getScheme()) || "https".equalsIgnoreCase(uri.getScheme())) continue;
                throw new CodedException((InfoMessage.MessageCode)FSProviderCodes.ERR_FSPROVIDER_HTTP_INVALID_URI, "HTTP dataset only supports HTTP(S) URLs, got " + source.url);
            }
            catch (URISyntaxException e) {
                throw new CodedException((InfoMessage.MessageCode)FSProviderCodes.ERR_FSPROVIDER_HTTP_INVALID_URI, "Incorrect URL: " + source.url, (Throwable)e);
            }
        }
    }

    @Override
    public String suggestName() {
        try {
            String candidateName;
            URI uri;
            String[] pathElements;
            if (this.config.sources == null || this.config.sources.isEmpty()) {
                return null;
            }
            String url = this.config.sources.get((int)0).url;
            if (this.dataset.getPartitioningSchema().isPartitioned()) {
                url = FilePartitioner.substitutePartitionIdentifierInPathForSuggestedName(url, this.dataset.getPartitioningSchema());
            }
            if ((pathElements = StringUtils.defaultIfBlank((String)(uri = new URI(url)).getPath(), (String)"").split("/")).length > 0) {
                candidateName = pathElements[pathElements.length - 1];
                int lastDot = candidateName.lastIndexOf(46);
                if (lastDot > 0) {
                    String extension = candidateName.substring(lastDot + 1);
                    if (Sets.newHashSet((Object[])new String[]{"zip", "gz", "gzip", "bz", "bz2", "csv", "txt", "text", "log", "xsl", "xslx"}).contains(extension)) {
                        candidateName = candidateName.substring(0, candidateName.length() - 1 - extension.length());
                    }
                }
            } else {
                candidateName = uri.getHost();
            }
            return candidateName.replace('.', '_').replace('-', '_');
        }
        catch (Exception e) {
            logger.info((Object)"Could not suggest name", (Throwable)e);
            return null;
        }
    }

    @Override
    public List<Partition> listPartitions() {
        if (this.dataset.getPartitioningSchema().isPartitioned()) {
            ArrayList<Partition> partitions = new ArrayList<Partition>(this.config.partitions.size());
            for (String partition : this.config.partitions) {
                if (StringUtils.isBlank((String)partition)) continue;
                partitions.add(PartitionFactory.fromIdentifier(this.dataset.getPartitioningSchema(), partition));
            }
            return partitions;
        }
        return Collections.singletonList(Partition.newNP());
    }

    @Override
    public long getRecords() throws Exception {
        if (this.dataset.getPartitioningSchema().isPartitioned()) {
            long total = 0L;
            for (Partition p : this.listPartitions()) {
                total += this.getPartitionRecords(p);
            }
            return total;
        }
        return this.getPartitionRecords(null);
    }

    @Override
    public long getPartitionRecords(Partition p) throws Exception {
        if (this.getSources(p) == null) {
            return 0L;
        }
        CountingProcessorOutput counting = new CountingProcessorOutput();
        if (p == null || p.isNP() || p.isAll() || "NP".equals(p.id()) || "ALL".equals(p.id())) {
            UniversalSingleThreadPusher.push(this.authCtx, this.dataset, (ProcessorOutput)counting, (ColumnFactory)new StreamColumnFactory(), (RowFactory)new StreamRowFactory());
        } else {
            StreamableDatasetSelection selection = new StreamableDatasetSelection().withSelectedPartitionIds(Lists.newArrayList((Object[])new String[]{p.id()}));
            UniversalSingleThreadPusher.push(this.authCtx, this.dataset, selection, (ProcessorOutput)counting, (ColumnFactory)new StreamColumnFactory(), (RowFactory)new StreamRowFactory());
        }
        return counting.getCount();
    }

    @Override
    public DatasetReadiness getReadiness(Partition p, @Nullable ComputableHashComputer.ReadinessComputationSession session) {
        try {
            List<BuiltinFSDatasets.HTTPSource> sources = this.getSources(p);
            if (sources == null) {
                return DatasetReadiness.notReady(new CodedException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_CONFIG, "Sources are empty"));
            }
            boolean allEmpty = true;
            StringBuilder sb = new StringBuilder();
            for (BuiltinFSDatasets.HTTPSource source : sources) {
                FSPathOrDirectory path = this.provider.stat(source.url);
                sb.append(source.url).append('=');
                if (path == null) {
                    if (!this.config.consider404AsEmpty) {
                        return DatasetReadiness.notReady(new IOException("Error 404 (Not Found) while fetching " + source.url));
                    }
                    sb.append("0@0\n");
                    continue;
                }
                sb.append(path.getSize()).append('@').append(path.getLastModified()).append('\n');
                allEmpty = allEmpty && path.getSize() == 0L;
            }
            if (this.dataset.getParams().isNotReadyIfEmpty()) {
                logger.info((Object)("Checking whether partition " + p.id() + " is empty"));
                if (allEmpty) {
                    return DatasetReadiness.notReady((Throwable)new CodedIOException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_PARTITION_EMPTY, "Input partition " + p.id() + " of dataset " + this.dataset.getFullName() + " is empty"));
                }
                CountingProcessorOutput counting = new CountingProcessorOutput();
                StreamableDatasetSelection selection = StreamableDatasetSelection.head100K();
                selection.withSelectedPartitions(Lists.newArrayList((Object[])new Partition[]{p}));
                selection.maxRecords = 2L;
                UniversalSingleThreadPusher.push(this.authCtx, this.dataset, selection, (ProcessorOutput)counting, (ColumnFactory)new StreamColumnFactory(), (RowFactory)new StreamRowFactory());
                if (counting.getCount() == 0L) {
                    return DatasetReadiness.notReady((Throwable)new CodedIOException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_PARTITION_EMPTY, "Input partition " + p.id() + " of dataset " + this.dataset.getFullName() + " is empty"));
                }
            }
            return DatasetReadiness.ready(DKUtils.md5Base64((String)sb.toString()));
        }
        catch (Exception e) {
            return DatasetReadiness.error(e);
        }
    }

    @Override
    public boolean partitionExists(Partition p) throws Exception {
        return this.getFirstNonEmptyPath(p) != null;
    }

    @Override
    public DatasetTestHandler buildTestHandler() throws IOException {
        return new HTTPDatasetTestHandler(this, this.dataset);
    }

    public InputSplit getPartitionSplit(Partition partition) throws CodedException {
        List<BuiltinFSDatasets.HTTPSource> sources = this.getSources(partition);
        if (sources == null) {
            return new InputSplit();
        }
        ArrayList<String> urls = new ArrayList<String>(sources.size());
        for (BuiltinFSDatasets.HTTPSource source : sources) {
            urls.add(source.url);
        }
        return new InputSplit(urls, (String)(partition == null || partition.isNP() ? "HTTP:all" : "HTTP:" + partition.id()));
    }

    @Override
    public FilterResultWithSplits getFilterSplits(InputFilter filter) throws CodedException {
        FilterResultWithSplits ret = new FilterResultWithSplits();
        if (filter.hasPartitionsFiltering()) {
            for (Partition p : filter.getPartitionsClause()) {
                ret.withSplit((com.dataiku.dip.input.InputSplit)this.getPartitionSplit(p));
            }
        } else {
            ret.withSplit((com.dataiku.dip.input.InputSplit)this.getSingleSplit());
        }
        ret.setNeedsRefilter(filter.getFilter() != null && filter.getFilter().enabled);
        return ret;
    }

    @Nullable
    private List<BuiltinFSDatasets.HTTPSource> getSources(@Nullable Partition partition) throws CodedException {
        Partition p;
        if (partition == null) {
            List<Partition> partitions = this.listPartitions();
            if (partitions.isEmpty()) {
                throw new CodedException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_CONFIG, "Partition list is empty but dataset is partitioned");
            }
            p = partitions.get(0);
        } else {
            if (partition.isAll() && this.dataset.getPartitioningSchema().isPartitioned()) {
                List<Partition> partitions = this.listPartitions();
                ArrayList<BuiltinFSDatasets.HTTPSource> sources = new ArrayList<BuiltinFSDatasets.HTTPSource>(this.config.sources.size() * partitions.size());
                for (Partition p2 : partitions) {
                    List<BuiltinFSDatasets.HTTPSource> pSources = this.getSources(p2);
                    if (pSources == null) continue;
                    sources.addAll(pSources);
                }
                return sources;
            }
            p = partition;
        }
        List<BuiltinFSDatasets.HTTPSource> sources = this.config.sources;
        if (sources == null || sources.isEmpty()) {
            return null;
        }
        if (!this.dataset.getPartitioningSchema().isPartitioned()) {
            return sources;
        }
        ArrayList<BuiltinFSDatasets.HTTPSource> resolvedSources = new ArrayList<BuiltinFSDatasets.HTTPSource>(sources.size());
        for (BuiltinFSDatasets.HTTPSource source : sources) {
            BuiltinFSDatasets.HTTPSource resolved = new BuiltinFSDatasets.HTTPSource();
            resolved.url = FilePartitioner.resolvePartitionIdentifierInURL(p, this.dataset.getPartitioningSchema(), source.url);
            resolvedSources.add(resolved);
        }
        return resolvedSources;
    }

    @Nullable
    private FSPath getFirstNonEmptyPath(Partition partition) throws CodedIOException, CodedException {
        Partition p;
        if (partition == null && this.dataset.getPartitioningSchema().isPartitioned()) {
            if (StringUtils.isBlank((String)this.config.previewPartition)) {
                throw new CodedIOException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_CONFIG, "Preview partition is required");
            }
            p = PartitionFactory.fromIdentifier(this.dataset.getPartitioningSchema(), this.config.previewPartition);
        } else {
            p = partition;
        }
        List<BuiltinFSDatasets.HTTPSource> sources = this.getSources(p);
        if (sources == null) {
            return null;
        }
        for (BuiltinFSDatasets.HTTPSource source : sources) {
            logger.debug((Object)("Enumerating HTTP source: " + source.url));
            HTTPFSProvider.HTTPFSEnumerationResult result = this.provider.enumerateRecursive(source.url, new FSEnumerationSettings());
            if (!result.isSuccessful()) {
                throw new CodedIOException((InfoMessage.MessageCode)FSProviderCodes.ERR_FSPROVIDER_HTTP_REQUEST_FAILED, "Error while fetching " + source.url, result.getError());
            }
            if (result.enumerationPrefixExists()) {
                logger.debug((Object)"Prefix exists");
                FSPath path = result.getPaths().iterator().next();
                logger.info((Object)("Path size: " + path.getSize()));
                if (path.getSize() != -1L && path.getSize() <= 0L) continue;
                return path;
            }
            if (!this.config.consider404AsEmpty) {
                throw new CodedIOException((InfoMessage.MessageCode)FSProviderCodes.ERR_FSPROVIDER_HTTP_REQUEST_FAILED_STATUS, "Error 404 (Not Found) while fetching " + source.url);
            }
            logger.info((Object)"Prefix does not exist (404?)");
        }
        return null;
    }

    @Override
    public boolean isManaged() {
        return false;
    }

    @Override
    public void executePreRenameOperations() {
    }

    @Override
    public boolean executeFastPostCreateOperations() throws Exception {
        if (this.dataset.getPartitioningSchema().isPartitioned() && this.config.partitions.isEmpty() && StringUtils.isNotEmpty((String)this.config.previewPartition)) {
            this.config.partitions.add(this.config.previewPartition);
            return true;
        }
        return false;
    }

    @Override
    public boolean executeSlowPostCreateOperations_NT() {
        return false;
    }

    private static CodedException cannotWrite() {
        return new CodedException((InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_ACTION_NOT_SUPPORTED, "Cannot perform write operation on HTTP dataset");
    }

    @Override
    public void createManaged() throws Exception {
        throw HTTPDatasetHandler.cannotWrite();
    }

    @Override
    public void clearAllData() throws Exception {
        throw HTTPDatasetHandler.cannotWrite();
    }

    @Override
    public void clearPartitions(List<Partition> partitions) throws Exception {
        throw HTTPDatasetHandler.cannotWrite();
    }

    @Override
    public void clearAllDataAndStructure() throws Exception {
        throw HTTPDatasetHandler.cannotWrite();
    }

    @Override
    public Output buildOutput(Partition targetPartition, int targetSplit, int resplitFactor, WarningsContext warningsContext) throws Exception {
        throw HTTPDatasetHandler.cannotWrite();
    }

    @Override
    public boolean outputHandlesClear() {
        return false;
    }

    @Override
    public boolean isParallelWritable() throws Exception {
        return false;
    }

    public class InputSplit
    extends StreamsInputSplit
    implements FSDatasetUtils.PathsBasedSplit {
        private final List<String> urls;
        private final String desc;
        private int index = 0;

        public InputSplit() {
            this(new ArrayList<String>(0), "HTTP:empty");
        }

        public InputSplit(String url) {
            this(Collections.singletonList(url), url);
        }

        public InputSplit(List<String> urls, String desc) {
            this.urls = urls;
            this.desc = desc;
        }

        public String getDesc() {
            return this.desc;
        }

        public synchronized void reset() {
            this.index = 0;
        }

        public synchronized EnrichedInputStream nextStream() throws IOException, InterruptedException, DKUSecurityException {
            if (this.urls == null || this.index == this.urls.size()) {
                return null;
            }
            return HTTPDatasetHandler.this.provider.read(this.urls.get(this.index++));
        }

        public boolean isEmpty() {
            return this.urls == null || this.urls.isEmpty();
        }

        @Override
        public List<FSPath> getPaths() {
            ArrayList<FSPath> paths = new ArrayList<FSPath>(this.urls.size());
            for (String url : this.urls) {
                paths.add(new FSPath("/" + url));
            }
            return paths;
        }

        @Override
        public EnrichedInputStream getStreamForPath(FSPath p) throws IOException, InterruptedException, DKUSecurityException {
            return HTTPDatasetHandler.this.provider.read(p.path().substring(1));
        }
    }
}

