/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.llm.governance.forbiddenterms;

import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.SerializedDataset;
import com.dataiku.dip.dao.DatasetsDAO;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamColumn;
import com.dataiku.dip.datalayer.streamimpl.StreamColumnFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamRowFactory;
import com.dataiku.dip.datasets.UniversalSingleThreadPusher;
import com.dataiku.dip.llm.governance.forbiddenterms.ForbiddenTermsDetectionGuardrail;
import com.dataiku.dip.llm.governance.forbiddenterms.IForbiddenTermsService;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.server.services.TransactionService;
import com.dataiku.dip.transactions.ifaces.Transaction;
import com.dataiku.dip.utils.DKULogger;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
public class BackendForbiddenTermsService
implements IForbiddenTermsService {
    @Autowired
    private TransactionService transactionService;
    @Autowired
    private DatasetsDAO datasetsDAO;
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.llm.governance.forbiddenterms.service");

    @Override
    public Set<String> getForbiddenTerms_NoCheck(AuthCtx authCtx, ForbiddenTermsDetectionGuardrail.Params settings) throws Exception {
        Dataset dataset;
        HashSet<String> forbiddenTerms = new HashSet<String>();
        try (Transaction t = this.transactionService.retrieveOrBeginRead();){
            if (StringUtils.isBlank((String)settings.datasetProject) || StringUtils.isBlank((String)settings.datasetName)) {
                throw new IllegalArgumentException("This LLM connection uses a Forbidden terms detector that is missing a dataset in its settings");
            }
            SerializedDataset sd = (SerializedDataset)this.datasetsDAO.getMandatory(settings.datasetProject, settings.datasetName);
            dataset = Dataset.fromSerialized(sd);
        }
        logger.info((Object)("Collecting forbidden terms from " + dataset.getFullName()));
        StreamColumnFactory scf = new StreamColumnFactory();
        StreamColumn c2 = scf.column(settings.datasetColumn);
        StreamRowFactory srf = new StreamRowFactory();
        UniversalSingleThreadPusher.push(authCtx, dataset, new ProcessorOutput(){
            final /* synthetic */ Column val$c;
            final /* synthetic */ Set val$forbiddenTerms;
            {
                this.val$c = column;
                this.val$forbiddenTerms = set;
            }

            public void emitRow(Row row) throws Exception {
                String v = row.get(this.val$c);
                if (v != null) {
                    this.val$forbiddenTerms.add(v.toLowerCase(Locale.ENGLISH));
                }
            }

            public void lastRowEmitted() throws Exception {
            }

            public void cancel() throws Exception {
            }

            public void setMaxMemoryUsed(long size) {
            }
        }, (ColumnFactory)scf, (RowFactory)srf);
        logger.info((Object)("Collected " + forbiddenTerms.size() + " forbidden terms"));
        return forbiddenTerms;
    }
}

