/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.docextraction;

import com.dataiku.dip.docextraction.StructuredContent;
import com.dataiku.dip.docextraction.common.InputRefs;
import com.dataiku.dip.docextraction.common.TextChunk;
import com.dataiku.dip.utils.ExceptionUtils;
import com.vladsch.flexmark.ast.HardLineBreak;
import com.vladsch.flexmark.ast.Heading;
import com.vladsch.flexmark.ast.SoftLineBreak;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.ast.Document;
import com.vladsch.flexmark.util.ast.Node;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class StructuredExtractor {
    public static StructuredContent runMarkdownStructuredExtraction(String rawMarkdown, int maxSectionDepth) {
        if (maxSectionDepth == 0) {
            StructuredContent.Document doc = new StructuredContent.Document();
            doc.addChild(new StructuredContent.Text(rawMarkdown));
            return doc;
        }
        Parser parser = Parser.builder().build();
        Document document = parser.parse(rawMarkdown);
        ArrayDeque<StructuredContent.Section> stack = new ArrayDeque<StructuredContent.Section>();
        StructuredContent.Document root = new StructuredContent.Document();
        stack.push(root);
        StringBuilder textItemBuilder = new StringBuilder();
        for (Node node : document.getChildren()) {
            if (node instanceof Heading) {
                Heading heading = (Heading)node;
                if (heading.getLevel() > maxSectionDepth) {
                    textItemBuilder.append((CharSequence)node.getChars().append(new CharSequence[]{"\n"}));
                    continue;
                }
                StructuredContent.Section currentExtractedSection = new StructuredContent.Section(heading.getLevel(), heading.getChars().toString());
                if (stack.isEmpty()) {
                    stack.add(currentExtractedSection);
                    continue;
                }
                if (!textItemBuilder.isEmpty()) {
                    ((StructuredContent.Section)stack.peek()).addChild(new StructuredContent.Text(textItemBuilder.toString()));
                    textItemBuilder.setLength(0);
                }
                while (!stack.isEmpty() && ((StructuredContent.Section)stack.peek()).level >= heading.getLevel()) {
                    stack.pop();
                }
                if (!stack.isEmpty()) {
                    ((StructuredContent.Section)stack.peek()).addChild(currentExtractedSection);
                }
                stack.push(currentExtractedSection);
                continue;
            }
            if (node instanceof SoftLineBreak || node instanceof HardLineBreak) {
                textItemBuilder.append("\n");
                continue;
            }
            textItemBuilder.append((CharSequence)node.getChars());
        }
        if (!stack.isEmpty() && !textItemBuilder.isEmpty()) {
            ((StructuredContent.Section)stack.peek()).addChild(new StructuredContent.Text(textItemBuilder.toString()));
        }
        return root;
    }

    public static List<TextChunk> getFlatTextChunkListFromStructured(StructuredContent structure) {
        return StructuredExtractor.flattenTreeUsingDFS(structure, Collections.emptyList());
    }

    private static List<TextChunk> flattenTreeUsingDFS(StructuredContent item, List<String> currentOutline) {
        if (item == null) {
            return Collections.emptyList();
        }
        List<String> deeperOutline = currentOutline;
        switch (item.getType()) {
            case "text": {
                return Collections.singletonList(TextChunk.build(null, ((StructuredContent.Text)item).text, currentOutline));
            }
            case "image": {
                return Collections.singletonList(TextChunk.build(null, ((StructuredContent.Image)item).description, currentOutline));
            }
            case "table": {
                return Collections.singletonList(TextChunk.build(null, ((StructuredContent.Table)item).text, currentOutline));
            }
            case "section": {
                deeperOutline = new ArrayList<String>(currentOutline);
                deeperOutline.add(((StructuredContent.Section)item).title);
            }
            case "document": {
                if (item.content == null) {
                    return Collections.emptyList();
                }
                ArrayList<TextChunk> list = new ArrayList<TextChunk>();
                for (StructuredContent child : item.content) {
                    list.addAll(StructuredExtractor.flattenTreeUsingDFS(child, deeperOutline));
                }
                return list;
            }
        }
        throw new IllegalArgumentException("Unsupported structured content type: " + item.getType());
    }

    public static StructuredContent runTxtStructuredExtraction(String txt) {
        StructuredContent.Document doc = new StructuredContent.Document();
        StructuredContent.Text textStructuredItem = new StructuredContent.Text(txt);
        doc.addChild(textStructuredItem);
        return doc;
    }

    public static class StructuredExtractorSettings {
        public int maxSectionDepth = 6;
        public ImageHandlingMode imageHandlingMode = ImageHandlingMode.IGNORE;
        public OCRSettings ocrSettings;

        public String toString() {
            String s = "maxSectionDepth=" + this.maxSectionDepth + ", imageHandlingMode=" + this.imageHandlingMode.name();
            if (this.ocrSettings != null) {
                return s + ", ocrEngine=" + this.ocrSettings.ocrEngine.name() + ", ocrLanguages=" + this.ocrSettings.ocrLanguages;
            }
            return s;
        }
    }

    public static enum ImageHandlingMode {
        OCR,
        IGNORE;

    }

    public static class OCRSettings {
        public OCREngine ocrEngine;
        public String ocrLanguages = "en";

        public static enum OCREngine {
            EASYOCR,
            TESSERACT,
            AUTO;

        }
    }

    public static class StructuredExtractorInputs {
        public InputRefs.DocumentRef document;
    }

    public static class StructuredExtractionResponseOrError {
        public boolean ok;
        public StructuredContent content;
        public String errorMessage;

        public static StructuredExtractionResponseOrError fromSuccess(StructuredContent response) {
            StructuredExtractionResponseOrError resp = new StructuredExtractionResponseOrError();
            resp.ok = true;
            resp.content = response;
            return resp;
        }

        public static StructuredExtractionResponseOrError fromError(Throwable e) {
            StructuredExtractionResponseOrError resp = new StructuredExtractionResponseOrError();
            resp.ok = false;
            resp.errorMessage = ExceptionUtils.getMessageWithCauses((Throwable)e);
            return resp;
        }
    }

    public static class StructuredExtractorRequest {
        public StructuredExtractorInputs inputs = new StructuredExtractorInputs();
        public StructuredExtractorSettings settings = new StructuredExtractorSettings();

        public StructuredExtractorRequest() {
        }

        public StructuredExtractorRequest(InputRefs.DocumentRef document, StructuredExtractorSettings structuredSettings) {
            this.inputs.document = document;
            this.settings = structuredSettings;
        }
    }
}

