import numpy as np
import pandas as pd


def mine_process(workflow_df, end_time=None, transition="frequency"):
    """
    Mines the process from the given workflow DataFrame.

    Args:
        workflow_df (pd.DataFrame): DataFrame containing the workflow data.
        end_time (str, optional): End time for the process. Defaults to None.
        transition (str, optional): Type of transition to use. Can be 'frequency' or 'time'. Defaults to 'frequency'.
    Returns:
        dfg (pd.DataFrame): DataFrame containing the mined process.
        start_end_mined_df (pd.DataFrame): DataFrame containing start and end activities.
        activity_count (pd.DataFrame): DataFrame containing activity counts.
    """

    workflow_df = workflow_df.sort_values(by=["sorting"])

    start_activities = workflow_df.groupby("case")["activity"].first().reset_index()
    start_activities_df = start_activities.groupby("activity").size().reset_index()
    start_activities_df.columns = ["step", "frequency"]
    start_activities_df["start_end"] = "start"

    end_activities = workflow_df.groupby("case")["activity"].last().reset_index()
    end_activities_df = end_activities.groupby("activity").size().reset_index()
    end_activities_df.columns = ["step", "frequency"]
    end_activities_df["start_end"] = "end"

    start_end_mined_df = pd.concat([start_activities_df, end_activities_df])

    workflow_df["origin"] = workflow_df.groupby("case")["activity"].shift(1)
    if transition == "frequency":  # this is for the Frequency View
        dfg = (
            workflow_df[["origin", "activity"]]
            .groupby(["origin", "activity"])
            .size()
            .reset_index()
        )
        dfg.columns = ["source", "target", "weight"]
        activity_count = (
            workflow_df[["activity"]].groupby("activity").size().reset_index()
        )
        activity_count.columns = ["step", "frequency"]
    else:  # transition == "time" -> this is for the Time View

        # Compute time values attached to transitions
        ###

        # Calculate the time difference between the current and previous activity
        workflow_df["diff_time"] = workflow_df.groupby("case")["epoch_timestamp"].diff(
            1
        )

        # Create two metrics: `weight` and `frequency`
        # - `weight` is the mean of 'diff_time' for each transition (i.e. pair of activities): this is what we want to show in the Time View
        # - `frequency` is the count of transitions (we're counting the number of values in 'activity'); it isn't shown in the Time View, but it is used to calculate the `weighted_time` metric below, which is attached to activities when no end timestamps are available
        dfg = (
            workflow_df[["origin", "activity", "diff_time"]]
            .groupby(["origin", "activity"])
            .agg(weight=("diff_time", "mean"), frequency=("activity", "count"))
            .reset_index()
        )

        dfg.columns = ["source", "target", "weight", "frequency"]

        # Compute time values attached to activities
        ###

        if end_time == None:
            # Create a `weighted_time` metric: this is the average of times between the start of the current activity and the start of each possible following activity, weighted by the frequency of the transition
            weighted_average = lambda x: np.average(
                x, weights=dfg.loc[x.index, "frequency"]
            )
            activity_count = (
                dfg.groupby("source")
                .agg(weighted_time=("weight", weighted_average))
                .reset_index()
            )

            activity_count.columns = ["step", "frequency"]

            activity_count = pd.concat(
                [activity_count, pd.DataFrame({"step": ["END"], "frequency": [0]})]
            )

        else:
            # End timestamps are available, hence 'process_time' is available: we just take its mean value
            activity_count = (
                workflow_df.groupby("activity")["process_time"].mean().reset_index()
            )
            activity_count.columns = ["step", "frequency"]

        dfg = dfg.drop(["frequency"], axis=1)

    return dfg, start_end_mined_df, activity_count
