/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.input.formats;

import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.datasets.fs.FSDatasetUtils;
import com.dataiku.dip.fs.FSPath;
import com.dataiku.dip.input.formats.FileBasedFormatDetector;
import com.dataiku.dip.input.formats.InputFormatsDetector;
import com.dataiku.dip.input.formats.XmlFormatExtractor;
import com.dataiku.dip.input.stream.EnrichedInputStream;
import com.dataiku.dss.shadelib.org.apache.commons.io.input.CountingInputStream;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

public class XmlFormatDetector
implements FileBasedFormatDetector {
    private static long detectionLimit = 10000000L;

    private PathSniffer sniff(FSDatasetUtils.PathsBasedSplit split, FSPath path, String charsetOverride) throws Exception {
        EnrichedInputStream eis = split.getStreamForPath(path);
        InputStream xmlStream = eis.decompressedHeadStream(detectionLimit);
        SAXParserFactory parserFactory = XmlFormatExtractor.getSAXParserFactory();
        SAXParser parser = parserFactory.newSAXParser();
        CountingInputStream countingXmlStream = new CountingInputStream(xmlStream);
        InputSource xmlSource = new InputSource((InputStream)countingXmlStream);
        if (charsetOverride != null) {
            xmlSource.setEncoding(charsetOverride);
        }
        PathSniffer sniffer = new PathSniffer(countingXmlStream, charsetOverride);
        try {
            parser.parse(xmlSource, (DefaultHandler)sniffer);
        }
        catch (SAXParseException e) {
            if (charsetOverride == null) {
                return this.sniff(split, path, "utf-8");
            }
        }
        catch (SAXException sAXException) {
            // empty catch block
        }
        return sniffer;
    }

    private JSONArray getPossibleXPaths(PathSniffer sniffer) throws ParserConfigurationException, SAXNotRecognizedException, SAXNotSupportedException, SAXException, IOException, JSONException {
        JSONArray possibleXPaths = new JSONArray();
        for (int i = 0; i < sniffer.pathCounts.size(); ++i) {
            Map<String, Integer> pathCountsByDepth = sniffer.pathCounts.get(i);
            ArrayList candidates = Lists.newArrayList();
            for (Map.Entry<String, Integer> pathWithCount : pathCountsByDepth.entrySet()) {
                JSONObject pathCountPair = new JSONObject();
                pathCountPair.put("xpath", (Object)pathWithCount.getKey());
                pathCountPair.put("count", (Object)pathWithCount.getValue());
                pathCountPair.put("depth", i);
                JSONArray attributesFound = new JSONArray();
                for (String att : sniffer.attributes.get(pathWithCount.getKey())) {
                    attributesFound.put((Object)att);
                }
                pathCountPair.put("attributes", (Object)attributesFound);
                candidates.add(pathCountPair);
            }
            Collections.sort(candidates, new Comparator<JSONObject>(){

                @Override
                public int compare(JSONObject a, JSONObject b) {
                    int countDiff = 0;
                    try {
                        countDiff = b.getInt("count") - a.getInt("count");
                        return countDiff != 0 ? countDiff : a.getString("xpath").compareTo(b.getString("xpath"));
                    }
                    catch (JSONException jSONException) {
                        return 0;
                    }
                }
            });
            for (JSONObject candidate : candidates) {
                possibleXPaths.put((Object)candidate);
            }
        }
        return possibleXPaths;
    }

    @Override
    public Map<String, String> recomputeMetadata(String autodetectedFormatId, Dataset dataset, FSDatasetUtils.PathsBasedSplit split, FSPath path, String filename) throws Exception {
        if (autodetectedFormatId.equals("xml")) {
            JSONArray possibleXPaths = this.getPossibleXPaths(this.sniff(split, path, null));
            HashMap meta = Maps.newHashMap();
            meta.put("possibleXPaths", possibleXPaths.toString());
            return meta;
        }
        return null;
    }

    @Override
    public List<InputFormatsDetector.FormatWithMetadata> detect(Dataset dataset, FSDatasetUtils.PathsBasedSplit split, FSPath path, String filename) throws Exception {
        InputFormatsDetector.FormatWithMetadata fwm = new InputFormatsDetector.FormatWithMetadata();
        XmlFormatExtractor.Config config = new XmlFormatExtractor.Config();
        fwm.type = "xml";
        try {
            PathSniffer sniffer = this.sniff(split, path, null);
            JSONArray possibleXPaths = this.getPossibleXPaths(sniffer);
            fwm.metadata.put("possibleXPaths", possibleXPaths.toString());
            config.rootPath = null;
            if (sniffer.charset != null) {
                config.overrideFileCharset = true;
                config.charset = sniffer.charset;
            }
            for (int i = 0; i < possibleXPaths.length(); ++i) {
                if (((JSONObject)possibleXPaths.get(i)).getInt("count") <= 1) continue;
                config.rootPath = ((JSONObject)possibleXPaths.get(i)).getString("xpath");
                break;
            }
        }
        catch (Exception exception) {
            // empty catch block
        }
        fwm.params = config;
        return Lists.newArrayList((Object[])new InputFormatsDetector.FormatWithMetadata[]{fwm});
    }

    private class PathSniffer
    extends DefaultHandler {
        private final List<Map<String, Integer>> pathCounts = Lists.newArrayList();
        private final Map<String, Set<String>> attributes = Maps.newHashMap();
        private final Stack<String> elements = new Stack();
        private final CountingInputStream countingXmlStream;
        private boolean couldBeLeafNode;
        private final String charset;

        public PathSniffer(CountingInputStream countingXmlStream, String charset) {
            this.countingXmlStream = countingXmlStream;
            this.charset = charset;
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
            if (this.countingXmlStream.getByteCount() > detectionLimit) {
                throw new SAXException("Limit for detection reached");
            }
            this.elements.push(qName);
            String xpath = "/" + Joiner.on((String)"/").join(this.elements);
            if (!this.attributes.containsKey(xpath)) {
                this.attributes.put(xpath, new HashSet());
            }
            Set<String> attributeNames = this.attributes.get(xpath);
            for (int i = 0; i < atts.getLength(); ++i) {
                String attName = atts.getQName(i);
                attributeNames.add(attName);
            }
            this.couldBeLeafNode = atts.getLength() == 0;
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if (this.elements.size() > 0) {
                if (!this.couldBeLeafNode) {
                    Map<String, Integer> pathCountsByDepth;
                    String xpath = "/" + Joiner.on((String)"/").join(this.elements);
                    int depth = this.elements.size();
                    for (int i = this.pathCounts.size(); i <= depth; ++i) {
                        this.pathCounts.add(new HashMap());
                    }
                    pathCountsByDepth.put(xpath, ((pathCountsByDepth = this.pathCounts.get(depth)).containsKey(xpath) ? pathCountsByDepth.get(xpath) : 0) + 1);
                }
                this.elements.pop();
            }
            this.couldBeLeafNode = false;
        }
    }
}

