  oldw <- getOption("warn")
  options(warn = -1)
  
  library(tidyverse)
  library(yardstick)
  library(catboost)
  library(caret)
  require(Matrix)
  require(data.table)
  library(strex)
  
  setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
  getwd()
  
  catboost_predictions <-  function(df, model_proba){
    model_proba %>% as.data.frame() %>% 
      mutate(class_prediction = colnames(.)[max.col(.)]) %>%
      mutate(original_value = df %>% as.factor() %>% recode_factor(., "2" = "pronoun", "1" = "name","0" = "description"))%>% 
      mutate(class_prediction = recode_factor(class_prediction, "V3" = "pronoun", 
                                              "V2" = "name",
                                              "V1" = "description")) %>% 
      rename(pronoun_prob = V3) %>%
      rename(name_prob = V2) %>% 
      rename(description_prob = V1)
  }
  
  acc_table <- function(x){x %>%
      conf_mat(original_value, class_prediction) %>%
      summary() %>%
      dplyr::select(-.estimator) %>%
      filter(.metric %in%
               c("accuracy", "kap", "precision", "recall", "f_meas")) }
  
  unity_formation <- function(df){df %>% 
      filter(type=="train") %>% 
      select(-c(ID,type)) %>% 
      unite("concat_feat", 4:ncol(.), remove = TRUE, sep="-") %>%
      group_by(entity,refex,reftype, concat_feat) %>% 
      mutate(occurrence = n()) %>%
      ungroup() %>% 
      distinct() %>% #this will get rid of the exact repetitions 
      unite("unique_entity_reftype_feat", c(entity,reftype,concat_feat), remove= FALSE, sep="-") %>%
      group_by(unique_entity_reftype_feat) %>% 
      mutate(entity_reftype_id = cur_group_id()) %>% 
      arrange(desc(occurrence)) %>% 
      ungroup() %>% 
      distinct(unique_entity_reftype_feat, .keep_all=TRUE) 
  }
  
  unity_formation_0 <- function(df){df %>% 
      filter(type=="train") %>% 
      select(-c(ID,type)) %>% 
      group_by(entity,refex,reftype) %>% 
      mutate(occurrence = n()) %>%
      ungroup()%>% 
      distinct() %>% #this will get rid of the exact repetitions 
      unite("unique_entity_reftype_feat", c(entity,reftype), remove= FALSE, sep="-") %>%
      group_by(unique_entity_reftype_feat) %>% 
      mutate(entity_reftype_id = cur_group_id()) %>% 
      arrange(desc(occurrence)) %>% 
      ungroup() %>% 
      distinct(unique_entity_reftype_feat, .keep_all=TRUE) 
  }
  
  catboost_fit_params_best <- list(iterations = 1500,
                                   thread_count = 10,
                                   loss_function = 'MultiClass',
                                   ignored_features = c(4,9),
                                   border_count = 32,
                                   depth = 5,
                                   learning_rate = 0.03,
                                   l2_leaf_reg = 3.5,
                                   train_dir = 'train_dir',
                                   use_best_model = TRUE)
  


  
  ############load featureset --------------------
  
  webnlg_only_features <- readRDS("webnlg_tabular_data/webnlg_only_features.rds") %>% 
    mutate(reftype = ifelse(reftype=="NULL", "empty", reftype)) %>% 
    mutate(reftype = unlist(reftype)) %>% 
    mutate(prev_reftype = ifelse(prev_reftype=="NULL", "first_mention", prev_reftype)) %>% 
    mutate(prev_reftype = unlist(prev_reftype)) %>% 
    mutate(reftype = ifelse(reftype=="demonstrative", "description", reftype))