/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.docextraction;

import com.dataiku.dip.docextraction.Content;
import com.dataiku.dss.shadelib.org.apache.commons.codec.digest.DigestUtils;
import com.vladsch.flexmark.ast.HardLineBreak;
import com.vladsch.flexmark.ast.Heading;
import com.vladsch.flexmark.ast.SoftLineBreak;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.ast.Document;
import com.vladsch.flexmark.util.ast.Node;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.security.MessageDigest;
import java.util.ArrayDeque;

public class DocExtractionUtils {
    public static String buildMessageLogForDocument(String sourceFilePath, String message) {
        return String.format("[%s] - %s", sourceFilePath, message);
    }

    public static String padPageNumber(int pageNumber) {
        return String.format("%05d", pageNumber);
    }

    public static MessageDigest computeDigestFromFile(File file) throws IOException {
        MessageDigest messageDigest = DigestUtils.getSha1Digest();
        messageDigest.update(Files.readAllBytes(file.toPath()));
        return messageDigest;
    }

    public static Content runMarkdownStructuredExtraction(String rawMarkdown, int maxSectionDepth) {
        if (maxSectionDepth == 0) {
            Content.Document doc = new Content.Document();
            doc.addChild(new Content.Text(rawMarkdown));
            return doc;
        }
        Parser parser = Parser.builder().build();
        Document document = parser.parse(rawMarkdown);
        ArrayDeque<Content.Section> stack = new ArrayDeque<Content.Section>();
        Content.Document root = new Content.Document();
        stack.push(root);
        StringBuilder textItemBuilder = new StringBuilder();
        for (Node node : document.getChildren()) {
            if (node instanceof Heading) {
                Heading heading = (Heading)node;
                if (heading.getLevel() > maxSectionDepth) {
                    textItemBuilder.append((CharSequence)node.getChars().append(new CharSequence[]{"\n"}));
                    continue;
                }
                Content.Section currentExtractedSection = new Content.Section(heading.getLevel(), heading.getChars().toString());
                if (stack.isEmpty()) {
                    stack.add(currentExtractedSection);
                    continue;
                }
                if (!textItemBuilder.isEmpty()) {
                    ((Content.Section)stack.peek()).addChild(new Content.Text(textItemBuilder.toString()));
                    textItemBuilder.setLength(0);
                }
                while (!stack.isEmpty() && ((Content.Section)stack.peek()).level >= heading.getLevel()) {
                    stack.pop();
                }
                if (!stack.isEmpty()) {
                    ((Content.Section)stack.peek()).addChild(currentExtractedSection);
                }
                stack.push(currentExtractedSection);
                continue;
            }
            if (node instanceof SoftLineBreak || node instanceof HardLineBreak) {
                textItemBuilder.append("\n");
                continue;
            }
            textItemBuilder.append((CharSequence)node.getChars());
        }
        if (!stack.isEmpty() && !textItemBuilder.isEmpty()) {
            ((Content.Section)stack.peek()).addChild(new Content.Text(textItemBuilder.toString()));
        }
        return root;
    }

    public static Content runTxtStructuredExtraction(String txt) {
        Content.Document doc = new Content.Document();
        Content.Text textStructuredItem = new Content.Text(txt);
        doc.addChild(textStructuredItem);
        return doc;
    }
}

