import numpy as np # imports a fast numerical programming library
import scipy as sp #imports stats functions, amongst other things
import matplotlib as mpl # this actually imports matplotlib
import matplotlib.cm as cm #allows us easy access to colormaps
import matplotlib.pyplot as plt #sets up plotting under plt
# plt.style.use('ggplot')
from itertools import islice
import pandas as pd #lets us handle data as dataframes
from numpy import argmax
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from matplotlib import pyplot
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from scipy.interpolate import interp1d
from numpy import sqrt
from matplotlib import pyplot
from matplotlib import gridspec
# sns.set_style("whitegrid")
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV
# Classifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

# Result Analysis
from sklearn.externals import joblib
from sklearn.metrics import recall_score, precision_score, confusion_matrix
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import metrics


from tqdm import tqdm
from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()

# Some useful library
import os
from os.path import isfile

import json
import collections
import re, time , ntpath
import logging
import random
import glob
from pprint import pprint
from copy import deepcopy
from os import listdir
from collections import Counter
from pprint import pprint

from IPython.utils import io
from IPython.display import HTML, display
from ipywidgets import interact, Layout, HBox, VBox, Box
import ipywidgets as widgets
from IPython.display import clear_output

# from scipy.spatial.distance import jensenshannon
# from spacy.lang.en.stop_words import STOP_WORDS
# import en_core_sci_lg
import string



import pandas as pd
import re
import os
# For Ploting
import seaborn as sns

# Word Embedding
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import KFold

# File save
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
import seaborn as sns
import sys
import warnings
from matplotlib import pyplot as plt 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

if not sys.warnoptions:
    warnings.simplefilter("ignore")
from sklearn.model_selection import GridSearchCV 
import os
from pathlib import Path
path = Path(os.getcwd())
# print(path)


def ModelRunOverThresholds(df, genre, dict_feature_2_weight, optimal_grid_value, weights, reduction_type):
    
    dd = pd.DataFrame()
    ind_r=0
    
    ## Sorting the Features by their weights decreasing        
    dict_feature_2_weight = {k: v for k, v in sorted(dict_feature_2_weight.items(), key=lambda item: item[1], reverse=True)}
    sorted_feature_list = list(dict_feature_2_weight.keys())
    
    ## Taking the max weight among the features(To define the range)
    mx =- 10000
    for key in dict_feature_2_weight:
        mx = max(mx, dict_feature_2_weight[key])
        
    iteration_list = []
    if reduction_type == 'Threshold':
        iteration_list = np.linspace(0.0, mx, 20, endpoint = False)
    else:
        if len(sorted_feature_list) >= 50:
            iteration_list = np.arange(50, len(sorted_feature_list) + 1, 1)
        else:
            iteration_list = np.arange(len(sorted_feature_list), len(sorted_feature_list) + 1, 1)
        
    ## Iterate 20 times over the range 0 to max weight 
    cnt=0 
    
    cx = optimal_grid_value[genre]
    

    for num in iteration_list:
        cnt+=1
        dictionary_features = []
        filtered_col=[]
        if reduction_type == 'Threshold':
            ## add the feature Only if its absolute weight is greater than the threshold
            for key in dict_feature_2_weight:
                if dict_feature_2_weight[key] >= num:
                    dictionary_features.append(key)
                    filtered_col.append(key)
        else:
            dictionary_features = sorted_feature_list[0:num]
            filtered_col = sorted_feature_list[0:num]


        filtered_col.append('outcome')
        filtered_col.append('genre')

        ## Building New Dataframe only with filtered Columns
        df1= df[filtered_col]

        ## Lists to store accuracies
        final_sc=[]
        scores = []
        scores_f1 = []
        scores_pre = []
        scores_rec = []
              
        dictionary_weights= [] ## used to store weights for features
        for i in range(len(dictionary_features)):
            dictionary_weights.append(0.0)

        ## Choose Genrewise data from Dataset   
        dataframe1 = df1
        dataframe1 = dataframe1.values

        # Get X and Y
        XX1 = dataframe1[:,0:-2].astype(float) ## eliminating genre and outcome columns and Type Conversion
        YY1 = dataframe1[:,-2]

        ## Normalize input(XX1)
        from sklearn.preprocessing import normalize
        XX1 = normalize(XX1, axis=0, norm='max')
        
        ## Prepare encoded(YY1)
        encoder = preprocessing.LabelEncoder()
        encoder.fit(YY1)
        encoded_YY1 = encoder.transform(YY1)       

        ## 5 fold cross Validation
        kfold = KFold(n_splits=5, shuffle=True, random_state = 32)
        
        for train_index, test_index in kfold.split(XX1):

            ## Taking Test and Train Data For Each Iteration
            x_train, x_test = XX1[train_index], XX1[test_index]
            ny_train, ny_test = encoded_YY1[train_index], encoded_YY1[test_index]

            # Feature Scaling
            scaler = StandardScaler()
            x_train = scaler.fit_transform(x_train)
            x_test = scaler.transform(x_test)
            
            ## Train the model
            clf = LinearSVC(random_state= 32, tol=1e-5, C = cx, class_weight=weights)
            clf.fit(x_train,ny_train.ravel())

            ## Add results
            y_pred = clf.predict(x_test)
#             print(y_pred)
            
            ## Getting Weights
            weight_list = clf.coef_[0]
  
            dictionary_weights=[x + y for x, y in zip(dictionary_weights, weight_list)] ## add it to feature_weight

            ## Getting Scores
            result = clf.score(x_test,ny_test.ravel())
            f1 = f1_score(ny_test, y_pred, average="weighted")
            pr = precision_score(ny_test, y_pred, average="macro")
            re = recall_score(ny_test, y_pred, average="macro")
            scores.append(result*100)
            scores_f1.append(f1)
            scores_pre.append(pr)
            scores_rec.append(re)
                                  
        dictionary ={}
     
        for ii in range(len(dictionary_features)):
            dictionary[dictionary_features[ii]] = dictionary_weights[ii]
            
        ## Sorting the Features by their weights decreasing        
        dictionary = {k: v for k, v in sorted(dictionary.items(), key=lambda item: item[1], reverse=True)}
        
        scores = np.asarray(scores)
        filtered_col = filtered_col[0:-2] ## eliminating genre and outcome columns
        
        feature_length = len(dictionary.keys())

        genrewise_list = [genre, num, scores.mean(), np.array(scores_f1).mean(), np.array(scores_pre).mean(),  np.array(scores_rec).mean(), feature_length, dictionary]
        
        dd[ind_r]= genrewise_list
        ind_r+=1
        
    column_names =["Genre" ,"Threshold","Accuracy", "F1", "Precision", "Recall", "Feature_Length", "Features"]
    dd= dd.T
    dd.columns = column_names

    
    return dd

