library(gbm)
library(caret)
library(dataiku)

# Recipe inputs
df <- dkuReadDataset("train")

#-----------------------------------------------------------------
# Settings
#-----------------------------------------------------------------
target.variable <- 'Churn'

features.num <- c(
    'Account_Length', 'VMail_Message', 'Day_Mins', 'Day_Calls', 'Day_Charge', 'Eve_Mins', 
    'Eve_Calls', 'Eve_Charge', 'Night_Mins', 'Night_Calls', 'Night_Charge', 'Intl_Mins', 
    'Intl_Calls', 'Intl_Charge', 'CustServ_Calls'        
)

features.cat <- c(
    'State', 'Area_Code', 'Intl_Plan', 'VMail_Plan'
)

#-----------------------------------------------------------------
# Preprocessing
#-----------------------------------------------------------------
df[features.cat]    <- lapply(df[features.cat], as.factor)
df[features.num]    <- lapply(df[features.num], as.double)
df[target.variable] <- lapply(df[target.variable], as.factor)
train.ml <- df[c(features.cat, features.num, target.variable)]

#-------------------------------------------------------------------------
# TRAINING
#-------------------------------------------------------------------------
fit.control <- trainControl(
    method = "repeatedcv",
    number = 3,
    repeats = 2,
    classProbs = TRUE,
    summaryFunction = twoClassSummary    
)

gbm.grid <-  expand.grid(
    interaction.depth = c(1, 3, 5, 9),
    n.trees = (1:10)*50,
    shrinkage = 0.1,
    n.minobsinnode = 5
)

gbm.fit <- train(
    Churn ~ .,
    data = train.ml,
    method = "gbm",
    trControl = fit.control,
    tuneGrid = gbm.grid,
    metric = "ROC",
    verbose = FALSE
)


# Recipe outputs
model_r <- dkuManagedFolderPath("qz0nSBEJ")
setwd(model_r)
system("rm -rf *")
path <- paste(model_r, 'model.RData', sep="/")
save(gbm.fit, file = path)

