# Core code imports
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import numpy as np

# Import from pandas_utils.pandas_aggregations
from bs_commons.dku_utils.pandas_utils.pandas_aggregations import compute_column_lag_or_leads

# Import from dku_utils.core
from bs_commons.dku_utils.projects.project_commons import get_current_project_and_variables

# Get current project and variables
project, variables = get_current_project_and_variables()
app_variables = variables["standard"]

# Read recipe inputs
monthly_segment_retention = dataiku.Dataset("monthly_segment_retention")
monthly_segment_retention_df = monthly_segment_retention.get_dataframe()
monthly_most_likely_next_segments = dataiku.Dataset("monthly_most_likely_next_segments")
monthly_most_likely_next_segments_df = monthly_most_likely_next_segments.get_dataframe()

# Merge dataframes
all_monthly_segments_information_df = monthly_segment_retention_df.merge(
    monthly_most_likely_next_segments_df,
    how="left",
    on=["previous_rfm_reference_month_start", "previous_segment_label"]
)

# Compute column lag or leads
COLUMNS_TO_REJECT_FROM_LAGS = []
months_interval_between_rfm_transitions = app_variables["months_interval_between_rfm_transitions_app"]
for column_name in all_monthly_segments_information_df.columns:
    if column_name not in ["previous_rfm_reference_month_start", "previous_segment_label"]+COLUMNS_TO_REJECT_FROM_LAGS:
        all_monthly_segments_information_df = compute_column_lag_or_leads(
            all_monthly_segments_information_df,
            target_column=column_name,
            partitioning_columns=["previous_segment_label"],
            order_columns=["previous_rfm_reference_month_start"],
            order_columns_to_sort_ascending=["previous_rfm_reference_month_start"],
            lag_values_to_retrieve=[months_interval_between_rfm_transitions]
        )

# Write recipe outputs
monthly_segments_information_lagged = dataiku.Dataset("monthly_segments_information_lagged")
monthly_segments_information_lagged.write_with_schema(all_monthly_segments_information_df)
