import logging
import pandas as pd
from pandas.tseries.offsets import BDay, Milli, YearEnd, MonthEnd, QuarterEnd, Week, BusinessDay, Day, Hour, Minute, Second
from dataiku.core import doctor_constants
from dataiku.doctor.timeseries.utils import set_day_within_month

logger = logging.getLogger(__name__)


class PandasFrequencyString:
    """Class to map Java time unit to pandas frequency alias and pandas offset frequency 
    
    Args:
        pandas_alias (str): pandas time frequency used in pd.date_range or the gluonts models
                            (https://pandas.pydata.org/pandas-docs/version/1.0.5/user_guide/timeseries.html#offset-aliases)
        pandas_offset (str, optional): pd.DateOffset argument used to add an extra time step for time units that are incompatible with rounding (see method generate_date_range)
                                       (https://pandas.pydata.org/pandas-docs/version/1.0.5/reference/api/pandas.tseries.offsets.DateOffset.html)
    """

    def __init__(self, pandas_alias, pandas_offset=None):
        self.pandas_alias = pandas_alias
        self.pandas_offset = pandas_offset


PANDAS_FREQUENCY_STRINGS = {
    "YEAR": PandasFrequencyString(YearEnd._prefix, "years"),
    "HALF_YEAR": PandasFrequencyString(MonthEnd._prefix, "months"),
    "QUARTER": PandasFrequencyString(QuarterEnd._prefix, "months"),
    "MONTH": PandasFrequencyString(MonthEnd._prefix, "months"),
    "WEEK": PandasFrequencyString(Week._prefix, "weeks"),
    "BUSINESS_DAY": PandasFrequencyString(BusinessDay._prefix, "days"),
    "DAY": PandasFrequencyString(Day._prefix),
    "HOUR": PandasFrequencyString(Hour._prefix),
    "MINUTE": PandasFrequencyString(Minute._prefix),
    "SECOND": PandasFrequencyString(Second._prefix),
    "MILLISECOND": PandasFrequencyString(Milli._prefix),
}


ORDERED_CALENDAR_DAYS = ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
MONTHS_SUFFIXES = ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]


def get_time_unit_end_of_week(end_of_week_day_index):
    """Return end of week suffix used by pandas from the Java day of week index"""
    return ORDERED_CALENDAR_DAYS[end_of_week_day_index - 1]


def get_frequency(core_params):
    """Return a pandas frequency from the MLTask core_params"""
    time_step_params = core_params[doctor_constants.TIME_STEP_PARAMS]

    time_step = time_step_params[doctor_constants.NUMBER_OF_TIME_UNITS]
    time_unit = time_step_params[doctor_constants.TIME_UNIT]    
    end_of_week_day_index = time_step_params[doctor_constants.END_OF_WEEK_DAY]
    time_unit_end_of_week = get_time_unit_end_of_week(end_of_week_day_index)
    unit_alignment = time_step_params.get(doctor_constants.UNIT_ALIGNMENT)

    return _get_pandas_frequency(time_step, time_unit, time_unit_end_of_week, unit_alignment)


def get_monthly_day_alignment(core_params):
    """ Returns the day alignment in month-based periods, None otherwise
    """
    if supports_monthly_alignment(core_params[doctor_constants.TIME_STEP_PARAMS].get(doctor_constants.TIME_UNIT)):
        return core_params[doctor_constants.TIME_STEP_PARAMS].get(doctor_constants.MONTHLY_ALIGNMENT)
    return None


def supports_monthly_alignment(time_unit):
    return time_unit in ["MONTH", "QUARTER", "HALF_YEAR", "YEAR"]


def _get_pandas_time_step(time_step, time_unit):
    """Transform the Java time step into a pandas compatible time step"""
    # half-year are just monthly frequency in pandas (6M)
    if time_unit == "HALF_YEAR":
        time_step = 6 * time_step
    return time_step


def _get_pandas_frequency(time_step, time_unit, time_unit_end_of_week, unit_alignment):
    """Return a pandas frequency from the Java time_step, time_unit, time_unit_end_of_week and unit_alignment"""
    pandas_time_step = _get_pandas_time_step(time_step, time_unit)
    pandas_time_unit = PANDAS_FREQUENCY_STRINGS[time_unit].pandas_alias

    if time_unit == "WEEK":
        pandas_time_unit = "{}-{}".format(pandas_time_unit, time_unit_end_of_week)

    if time_unit == "QUARTER":
        if unit_alignment is None:
            unit_alignment = 3
        pandas_time_unit += "-" + MONTHS_SUFFIXES[unit_alignment-1]

    if time_unit == "YEAR":
        if unit_alignment is None:
            unit_alignment = 12
        pandas_time_unit += "-" + MONTHS_SUFFIXES[unit_alignment-1]

    return "{}{}".format(pandas_time_step, pandas_time_unit)


def _shift_date_month_within_half_year(date, unit_alignment):
    half_year_index = date.month
    if half_year_index > 6:
        half_year_index -= 6

    if not unit_alignment:
        unit_alignment = 6

    if half_year_index < unit_alignment:
        # shift forward within the same half year (for instance February to May)
        return date + pd.DateOffset(months=unit_alignment - half_year_index)
    elif half_year_index > unit_alignment:
        # shift forward in the next half year (for instance March to July)
        return date + pd.DateOffset(months=6 + unit_alignment - half_year_index)

    return date


def generate_date_range(start_time, end_time, time_step, time_unit, time_unit_end_of_week, unit_alignment, monthly_alignment=None):
    """Return a pd.DatetimeIndex"""
    pandas_offset = PANDAS_FREQUENCY_STRINGS[time_unit].pandas_offset

    # for half years we actually use generic 6 months periods so we have to shift the dates manually to fall on the requested end month
    if time_unit == "HALF_YEAR":
        start_time = _shift_date_month_within_half_year(start_time, unit_alignment)
        end_time = _shift_date_month_within_half_year(end_time, unit_alignment)

    # for business day, week, month, year we round up to the closest day
    rounding_time_unit = PANDAS_FREQUENCY_STRINGS[time_unit].pandas_alias if pandas_offset is None else Day._prefix
    start_index = start_time.round(rounding_time_unit)
    end_index = end_time.round(rounding_time_unit)

    if pandas_offset is not None:
        # pd.date_range omits the end index when frequency is business day, week, month, quarter or year, 
        # unless the end index is exactly at the end of the period.
        # so we need to offset the end index to make sure it falls between the last time step and the following one
        pandas_time_step = _get_pandas_time_step(time_step, time_unit)
        if time_unit == "BUSINESS_DAY":
            # if end index is not a business day, then we want to end the range on the previous Friday
            # adding Day() - BDay() does nothing if the timestamp is already a business day and converts it into the last previous business day otherwise
            end_index = end_index + Day() - BDay()
        elif time_unit == "QUARTER":
            end_index = end_index + pd.DateOffset(**{pandas_offset: 3*pandas_time_step}) - Day()
        else:
            # we subtract by one Day the end index to make sure we do not include the following time stamp
            # if the end index is exactly at the end of the period
            end_index = end_index + pd.DateOffset(**{pandas_offset: pandas_time_step}) - Day()

    frequency = _get_pandas_frequency(time_step, time_unit, time_unit_end_of_week, unit_alignment)
    dates = pd.date_range(start=start_index, end=end_index, freq=frequency)

    if supports_monthly_alignment(time_unit) and monthly_alignment is not None:
        dates = set_day_within_month(dates, monthly_alignment)

    return dates
