import numpy as np
import pandas as pd

from dataiku.eda.computations.computation import Computation
from dataiku.eda.exceptions import DegenerateCaseError


class TimeSeriesComputation(Computation):

    def __init__(self, series_column, time_column):
        self.series_column = series_column
        self.time_column = time_column

    def describe(self):
        return "{}(series_column={}, time_column={})".format(
            self.__class__.__name__,
            self.series_column,
            self.time_column
        )

    def apply(self, idf, ctx):
        raise NotImplementedError

    def _get_time_series(self, idf):
        """
        Gets the time series corresponding to the columns.
        The series is ordered according to the time column (ascending).

        :param idf: the immutable data frame
        :type idf: ImmutableDataFrame

        :return: the couple of (series, timestamps) by ascending order of timestamps
        :rtype: (np.ndarray, np.ndarray)
        """
        series = idf.float_col(self.series_column)
        timestamps = idf.date_col(self.time_column)

        if not np.isfinite(series).all():
            raise DegenerateCaseError("The series has missing values")
        if pd.isnull(timestamps).any():
            raise DegenerateCaseError("The timestamps have invalid values (not a time)")

        sorted_indexes = np.argsort(timestamps)
        return series[sorted_indexes], timestamps[sorted_indexes]


def format_iso8601(ts):
    """
    Formats the provided timestamp as an iso8601 compliant string.
    Only supports instances of pd.Timestamp (throws otherwise).

    Considers that the timestamp is either in UTC or timezone naive
    (representing a UTC timestamp in all cases).
    Uses the "Z" notation rather than "+00:00"
    """
    if not isinstance(ts, pd.Timestamp):
        raise ValueError("Can only format instances of pd.Timestamp, got {}".format(type(ts)))

    return ts.tz_localize(None).isoformat() + "Z"
