/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.expr;

import com.dataiku.dip.i18n.TranslationService;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.expr.TokenizedText;
import com.dataiku.dip.shaker.processors.transform.TextSimplifierAlgorithm;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.text.Labelled;
import com.dataiku.dip.util.ParamDesc;

public class TextSimplifier {
    static String[] choices = new String[]{"afrikaans", "albanian", "arabic", "armenian", "basque", "bengali", "bulgarian", "catalan", "chinese", "chinese_traditional", "croatian", "czech", "danish", "dutch", "dutch_2021", "english", "english_2021", "estonian", "finnish", "french", "french_2021", "german", "german_2021", "greek", "gujarati", "hebrew", "hindi", "hungarian", "icelandic", "indonesian", "irish", "italian", "italian_2021", "japanese", "kannada", "korean", "latvian", "lithuanian", "luxembourgish", "macedonian", "malayalam", "marathi", "nepali", "norwegian", "persian", "polish", "portuguese", "portuguese_2021", "romanian", "russian", "sanskrit", "serbian", "sinhala", "slovak", "slovenian", "spanish", "spanish_2021", "swedish", "tagalog", "tamil", "tatar", "telugu", "thai", "turkish", "ukrainian", "urdu", "vietnamese", "yoruba"};
    private final TextSimplifierAlgorithm tsa;

    public static String getHelp() {
        return "* Normalize text: transform to lowercase, remove punctuation & accents and perform Unicode NFD normalization (Caf\u00e9 -> cafe)\n* Stem words: transform each word into its 'stem', i.e. its grammatical root. For example, 'grammatical' is transformed to 'grammat'. This transformation is language-specific and requires you to enter the language of your column.\n* Clear stop words: remove so-called 'stop words' (the, I, a, of, ...). This transformation is language-specific and requires you to enter the language of your column.\n* Sort words alphabetically: sorts all words of the text. For example, 'the small dog' is transformed to 'dog small the'. This allows you to match together strings that are written with the same words in a different order.\n";
    }

    public static void withParams(String language, ProcessorDesc pd, boolean operationSelector) {
        TranslationService sts = (TranslationService)SpringUtils.getBean(TranslationService.class);
        pd.withBool("normalize", sts.translate(language, "SHAKER.PROCESSOR.TextSimplifier.DESCRIPTION.NORMALIZE", "Normalize text", new Object[0]));
        pd.withBool("stem", sts.translate(language, "SHAKER.PROCESSOR.TextSimplifier.DESCRIPTION.STEM", "Stem words", new Object[0]));
        pd.withBool("clearStopWords", sts.translate(language, "SHAKER.PROCESSOR.TextSimplifier.DESCRIPTION.CLEAR_STOP_WORDS", "Clear stop words", new Object[0]));
        pd.withBool("sortAlphabetically", sts.translate(language, "SHAKER.PROCESSOR.TextSimplifier.DESCRIPTION.SORT_ALPHABETICALLY", "Sort words alphabetically", new Object[0]));
        pd.withParam(ParamDesc.advancedSelect("language", sts.translate(language, "SHAKER.PROCESSOR.TextSimplifier.DESCRIPTION.LANGUAGE", "Language", new Object[0]), "Language of the text (used with stemming & stop words)", choices, choices).withMandatory(false));
        if (operationSelector) {
            pd.withParam(ParamDesc.advancedSelect("operation", sts.translate(language, "SHAKER.PROCESSOR.TextSimplifier.DESCRIPTION.OPERATION", "Operation", new Object[0]), "Storage of tokens", OperationType.class, language).withDefaultValue(OperationType.TO_JSON));
            pd.withMandSParam("prefix", sts.translate(language, "SHAKER.PROCESSOR.TextSimplifier.DESCRIPTION.PREFIX", "Prefix of columns (unfold)", new Object[0]));
        }
    }

    public TextSimplifier(Parameter params) throws Exception {
        this.tsa = new TextSimplifierAlgorithm((TextSimplifierAlgorithm.Parameter)params);
    }

    public void simplify(TokenizedText tokenized) {
        this.tsa.simplify(tokenized);
    }

    private static String translate(String lang, String id, String defaultValue) {
        TranslationService ts = (TranslationService)SpringUtils.getBean(TranslationService.class);
        return ts.translate(lang, id, defaultValue, new Object[0]);
    }

    /*
     * Uses 'sealed' constructs - enablewith --sealed true
     */
    public static enum OperationType implements Labelled
    {
        TO_JSON{

            @Override
            public String getLabel() {
                return this.getLabel("en");
            }

            @Override
            public String getLabel(String language) {
                return TextSimplifier.translate(language, "SHAKER.PROCESSOR.TextSimplifier.OperationType.TO_JSON", "Convert to JSON");
            }
        }
        ,
        FOLD{

            @Override
            public String getLabel() {
                return this.getLabel("en");
            }

            @Override
            public String getLabel(String language) {
                return TextSimplifier.translate(language, "SHAKER.PROCESSOR.TextSimplifier.OperationType.FOLD", "One row per token");
            }
        }
        ,
        SPLIT{

            @Override
            public String getLabel() {
                return this.getLabel("en");
            }

            @Override
            public String getLabel(String language) {
                return TextSimplifier.translate(language, "SHAKER.PROCESSOR.TextSimplifier.OperationType.SPLIT", "One column per token");
            }
        };

    }

    public static class Parameter
    extends TextSimplifierAlgorithm.Parameter
    implements StepParams {
        private static final long serialVersionUID = 1L;
        public OperationType operation;
        public String prefix;

        public void validate() throws IllegalArgumentException {
        }
    }
}

