from dataiku.eda.types import Literal

from scipy.stats import entropy
import numpy as np

from dataiku.eda.computations.computation import UnivariateComputation
from dataiku.eda.computations.context import Context
from dataiku.eda.computations.immutable_data_frame import ImmutableDataFrame
from dataiku.eda.exceptions import NoDataError
from dataiku.eda.types import EntropyModel, EntropyResultModel


class Entropy(UnivariateComputation):
    @staticmethod
    def get_type() -> Literal["entropy"]:
        return "entropy"

    @staticmethod
    def build(params: EntropyModel) -> 'Entropy':
        return Entropy(params['column'])

    def apply(self, idf: ImmutableDataFrame, ctx: Context) -> EntropyResultModel:
        series = idf.text_col(self.column)

        if len(series) == 0:
            raise NoDataError()

        (categories, counts) = np.unique(series.codes, return_counts=True)
        frequencies = counts.astype(float) / len(series)
        h = entropy(frequencies, base=2)

        return {
            "type": self.get_type(),
            "value": h
        }
