from dataiku.eda.types import Literal

import numpy as np
import pandas as pd

from dataiku.eda.computations.computation import UnivariateComputation
from dataiku.eda.computations.context import Context
from dataiku.eda.computations.immutable_data_frame import ImmutableDataFrame
from dataiku.eda.types import CountDistinctModel, CountDistinctResultModel


class CountDistinct(UnivariateComputation):
    @staticmethod
    def get_type() -> Literal["count_distinct"]:
        return "count_distinct"

    @staticmethod
    def build(params: CountDistinctModel) -> 'CountDistinct':
        return CountDistinct(params['column'])

    def apply(self, idf: ImmutableDataFrame, ctx: Context) -> CountDistinctResultModel:
        values = idf.raw_col(self.column)

        if isinstance(values, pd.Categorical):
            distinct_values = pd.unique(values.codes)
        elif isinstance(values, np.ndarray) or isinstance(values, pd.DatetimeIndex):
            distinct_values = pd.unique(values)
        else:
            # Likely due to a bug or a change in ImmutableDataFrame
            raise ValueError("Unsupported series type: {}".format(type(values)))

        return {
            "type": CountDistinct.get_type(),
            "count": len(distinct_values)
        }
