import dataiku
from dataiku.customrecipe import *
import pandas as pd
import itertools
import os
import json
from datetime import datetime
from feature_selection_ga import FeatureSelectionGA
import fitness_function

#IO: load input dataset
input_dataset_name = get_input_names_for_role('input_dataset')[0]
input_dataset = dataiku.Dataset(input_dataset_name)
df = input_dataset.get_dataframe()

#Get user's parameter
recipe_config = get_recipe_config()
target = recipe_config["Target"]
n_pop = int(recipe_config["n_pop"])
cxpb = float(recipe_config["cxpb"])
mutxpb = float(recipe_config["mutxpb"])
n_gen = int(recipe_config["n_gen"])

dtrain =  df.drop(target, axis=1)
#Perform Feature Selection
fsga = FeatureSelectionGA(dtrain, df[target].values, verbose=True)
print("pipeline set")
pop = fsga.generate(n_pop=n_pop, cxpb=cxpb, mutxpb=mutxpb, n_gen=n_gen)
print("pipeline done")

col_list = fsga.df_X.columns.tolist()
col_to_keep = list(itertools.compress(col_list, fsga.best_ind))
print(col_to_keep)
result = pd.concat([fsga.df_X[col_to_keep], df[target]], axis=1)

#IO: write dataset with selected features
output_dataset_name = get_output_names_for_role('output_dataset')[0]
output_dataset = dataiku.Dataset(output_dataset_name)
output_dataset.write_with_schema(result)

#IO: dump json of infos for later transform
output_info_json = {}
output_info_json["selected_features"] = col_to_keep
output_info_json["n_pop"] = n_pop
output_info_json["cxpb"] = cxpb
output_info_json["mutxpb"] = mutxpb
output_info_json["n_gen"] = n_gen
output_info_json["dataset_name"] = input_dataset.full_name
output_info_json["timestamp"] = str(datetime.now())

output_folder_name = get_output_names_for_role('output_folder')[0]
output_folder_path = dataiku.Folder(output_folder_name).get_path()

with open(output_folder_path + "/output.json", 'w') as fp:
    json.dump(output_info_json, fp)