import numpy as np # imports a fast numerical programming library
import scipy as sp #imports stats functions, amongst other things
import matplotlib as mpl # this actually imports matplotlib
import matplotlib.cm as cm #allows us easy access to colormaps
import matplotlib.pyplot as plt #sets up plotting under plt
# plt.style.use('ggplot')
from itertools import islice
import pandas as pd #lets us handle data as dataframes
from numpy import argmax
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from matplotlib import pyplot
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from scipy.interpolate import interp1d
from numpy import sqrt
from matplotlib import pyplot
from matplotlib import gridspec
# sns.set_style("whitegrid")
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV
# Classifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

# Result Analysis
from sklearn.externals import joblib
from sklearn.metrics import recall_score, precision_score, confusion_matrix
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import metrics


from tqdm import tqdm
from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()

# Some useful library
import os
from os.path import isfile

import json
import collections
import re, time , ntpath
import logging
import random
import glob
from pprint import pprint
from copy import deepcopy
from os import listdir
from collections import Counter
from pprint import pprint

from IPython.utils import io
from IPython.display import HTML, display
from ipywidgets import interact, Layout, HBox, VBox, Box
import ipywidgets as widgets
from IPython.display import clear_output

# from scipy.spatial.distance import jensenshannon
# from spacy.lang.en.stop_words import STOP_WORDS
# import en_core_sci_lg
import string
#Collin Reinking
#collin.reinking@berkeley.edu


import pandas as pd
import re
import os
# For Ploting
import seaborn as sns

# Word Embedding
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import KFold

# File save
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
import seaborn as sns
import sys
import warnings
from matplotlib import pyplot as plt 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

if not sys.warnoptions:
    warnings.simplefilter("ignore")
from sklearn.model_selection import GridSearchCV 
import os
from pathlib import Path
path = Path(os.getcwd())
# print(path)

def ExtractGenrewiseFeatureWeights(df, genre, FeatureRange, optimal_grid_value, weights):
    
    start_idx_features = FeatureRange['start_idx_features']
    end_idx_features = FeatureRange['end_idx_features']
    idx_outcome = FeatureRange['idx_outcome']
    
    features = df.columns
    features = features[start_idx_features : end_idx_features]
    
    feature_weight= [] ## used to store weights for features
    for i in range(len(features)):
        feature_weight.append(0.0) ## set to zero ## for each iteration of K-fold we will add weights to it

    ## Choose Genrewise data from Dataset   
    dataframe = df
    
    cx = optimal_grid_value[genre]
    
    dataframe = dataframe.values
    
    # Get X and Y
    XX = dataframe[:, start_idx_features : end_idx_features].astype(float)
    YY = dataframe[:,idx_outcome]
    
#     print("YY", YY)

    ## Normalize input(X)
    from sklearn.preprocessing import normalize
    XX = normalize(XX, axis=0, norm='max')

    ## Prepare encoded(Y)
    from sklearn import preprocessing
    # encode class values as integers
    encoder = preprocessing.LabelEncoder()
    encoder.fit(YY)
    encoded_YY = encoder.transform(YY)

    kfold = KFold(n_splits=5, shuffle=True, random_state= 28)
    for train_index, test_index in kfold.split(XX):
        
        ## Taking Test and Train Data For Each Iteration
        x_train, x_test = XX[train_index], XX[test_index]
        ny_train, ny_test = encoded_YY[train_index], encoded_YY[test_index] 

        # Feature Scaling
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)

        ## Training the model
        clf = LinearSVC(random_state=0, tol=1e-5, C = cx, class_weight=weights)
        clf.fit(x_train,ny_train.ravel())

        ## Getting Result and add it to Feature Weight
        result = clf.score(x_test, ny_test.ravel())

        weight_list = list(clf.coef_[0]) ## Getting list of weights from trained model

        feature_weight=[x + y for x, y in zip(feature_weight, weight_list)] ## add it to feature_weight
    

    ## The average of absolute weight for each feature
    dict_feature_2_weight = {}
    feature_weight = list(feature_weight)

    for i in range(len(features)):
        if feature_weight[i]>=0:
            dict_feature_2_weight[features[i]] = feature_weight[i]/5   ## we added weights 5 times, so divide by 5
        else:
            dict_feature_2_weight[features[i]] = -feature_weight[i]/5
            
    ## Sorting the Features by their weights decreasing        
    dict_feature_2_weight = {k: v for k, v in sorted(dict_feature_2_weight.items(), key=lambda item: item[1], reverse=True)}

    ## Taking the max weight among the features(To define the range)
    mx =- 10000
    for key in dict_feature_2_weight:
        mx = max(mx, dict_feature_2_weight[key])
        
    
    
    return dict_feature_2_weight
