CLUSTER_OUTLIERS = 'cluster_outliers'

PREPROCESSING_LOGGER_NAME = "dku.ml.preprocessing"
PREPROCESSING_RELATED_LOGGER_NAMES = ["dku.ml.preprocessing", "dataiku.doctor.multiframe", "dataiku.doctor.utils", "dataiku.doctor.prediction.common"]

# Variable types
CATEGORY = "CATEGORY"
NUMERIC = "NUMERIC"
TEXT = "TEXT"
VECTOR = "VECTOR"
IMAGE = "IMAGE"

# Prediction types
MULTICLASS = "MULTICLASS"
REGRESSION = "REGRESSION"
BINARY_CLASSIFICATION = "BINARY_CLASSIFICATION"
CLUSTERING = "CLUSTERING"
TIMESERIES_FORECAST = "TIMESERIES_FORECAST"
CAUSAL_BINARY_CLASSIFICATION = "CAUSAL_BINARY_CLASSIFICATION"
CAUSAL_REGRESSION = "CAUSAL_REGRESSION"

CAUSAL_PREDICTION_TYPES = {CAUSAL_BINARY_CLASSIFICATION, CAUSAL_REGRESSION}

REGRESSION_TYPES = {REGRESSION, CAUSAL_REGRESSION, TIMESERIES_FORECAST}
CLASSIFICATION_TYPES = {BINARY_CLASSIFICATION, MULTICLASS, CAUSAL_BINARY_CLASSIFICATION}

DEEP_HUB_IMAGE_CLASSIFICATION = "DEEP_HUB_IMAGE_CLASSIFICATION"
DEEP_HUB_IMAGE_OBJECT_DETECTION = "DEEP_HUB_IMAGE_OBJECT_DETECTION"

NONE = "NONE"
IMPUTE = "IMPUTE"
DROP_ROW = "DROP_ROW"
KEEP_NAN_OR_IMPUTE = "KEEP_NAN_OR_IMPUTE"
KEEP_NAN_OR_DROP = "KEEP_NAN_OR_DROP"

NUM_IMPUTED_KEPT = "NUM_IMPUTED_KEPT"
NUM_IMPUTED_NOT_KEPT = "NUM_IMPUTED_NOT_KEPT"

DUMMIFY = "DUMMIFY"
UNFOLD = "UNFOLD"
IMPACT = "IMPACT"
ORDINAL = "ORDINAL"
FREQUENCY = "FREQUENCY"
TERM_HASH = "TERM_HASH"
CATEGORY_HANDLING = "category_handling"
NUMERICAL_HANDLING = "numerical_handling"
TEXT_HANDLING = "text_handling"
TARGET_VARIABLE = "target_variable"
TREATMENT_VARIABLE = "treatment_variable"
PREDICTION_TYPE = "prediction_type"
PREDICTION_VARIABLE = "prediction_variable"
PROBA_COLUMNS = "proba_cols"
CATEGORY_POSSIBLE_VALUES = "category_possible_values"
CATEGORY_POSSIBLE_COUNTS = "category_possible_counts"
CATEGORY_NEED_OTHERS = "category_need_others"

GENERATE_DERIVATIVE = "generate_derivative"
RESCALING = "rescaling"
RESCALING_METHOD = "rescaling_method"
TERM_HASH_SIZE = "term_hash_size"
MINMAX = "MINMAX"
AVGSTD = "AVGSTD"
MISSING_HANDLING = "missing_handling"
PER_FEATURE = "per_feature"
STATS = "stats"
STOP_SEARCH_FILENAME = "stop_search"

# Calibration

SIGMOID = "SIGMOID"
ISOTONIC = "ISOTONIC"
DEFAULT_CALIBRATION_DATA_RATIO = 0.2

DKU_CURRENT_ANALYSIS_ID = "DKU_CURRENT_ANALYSIS_ID"
DKU_CURRENT_MLTASK_ID = "DKU_CURRENT_MLTASK_ID"

FILL_NA_VALUE = "__DKU_N/A__"

# Reasons for no preprocessing result
PREPROC_FAIL = "FAIL"
PREPROC_DROPPED = "DROPPED"
PREPROC_NOTARGET = "NOTARGET"
PREPROC_ONECLASS = "ONECLASS"

EVALUATION_PARAMS = "evaluationParams"

# Timeseries forecasting
TIME_VARIABLE = "timeVariable"
PREDICTION_LENGTH = "predictionLength"
TIMESERIES_IDENTIFIER_COLUMNS = "timeseriesIdentifiers"
QUANTILES = "quantilesToForecast"
TIME_STEP_PARAMS = "timestepParams"
TIME_UNIT = "timeunit"
UNIT_ALIGNMENT = "unitAlignment"
MONTHLY_ALIGNMENT = "monthlyAlignment"
NUMBER_OF_TIME_UNITS = "numberOfTimeunits"
END_OF_WEEK_DAY = "endOfWeekDay"
TIMESERIES_SAMPLING = "timeseriesSampling"
TEST_SIZE = "testSize"
GAP_SIZE = "gapSize"
SKIP_TOO_SHORT_TIMESERIES_FOR_TRAINING = "skipTooShortTimeseriesForTraining"
CUSTOM_TRAIN_TEST_SPLIT = "customTrainTestSplit"
CUSTOM_TRAIN_TEST_INTERVALS = "customTrainTestIntervals"
ROLLING_WINDOWS_RESOURCES = "windows_resources.json"

# Causal scoring treatment assignment
SAMPLE_RATIO_EXACT = "SAMPLE_RATIO_EXACT"
SAMPLE_RATIO_APPROX = "SAMPLE_RATIO_APPROX"
THRESHOLD = "THRESHOLD"

# Causal learning methods
META_LEARNER = "META_LEARNER"

S_LEARNER = "S_LEARNER"
T_LEARNER = "T_LEARNER"
X_LEARNER = "X_LEARNER"

# Causal metrics weighting

INVERSE_PROPENSITY = "INVERSE_PROPENSITY"

# managed folder
IMPUTE_INVALID_PATHS = "impute_invalid_paths"

# Multiclass metrics averaging method

CLASS_AVERAGING_WEIGHTED = "WEIGHTED"
CLASS_AVERAGING_MACRO = "MACRO"

DKU_JSON_INFINITY = 1e99