import numpy as np # imports a fast numerical programming library
import scipy as sp #imports stats functions, amongst other things
import matplotlib as mpl # this actually imports matplotlib
import matplotlib.cm as cm #allows us easy access to colormaps
import matplotlib.pyplot as plt #sets up plotting under plt
# plt.style.use('ggplot')
from itertools import islice
import pandas as pd #lets us handle data as dataframes
from numpy import argmax
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from matplotlib import pyplot
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from scipy.interpolate import interp1d
from numpy import sqrt
from matplotlib import pyplot
from matplotlib import gridspec
# sns.set_style("whitegrid")
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV
# Classifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

# Result Analysis
from sklearn.externals import joblib
from sklearn.metrics import recall_score, precision_score, confusion_matrix
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import metrics


from tqdm import tqdm
from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()

# Some useful library
import os
from os.path import isfile

import json
import collections
import re, time , ntpath
import logging
import random
import glob
from pprint import pprint
from copy import deepcopy
from os import listdir
from collections import Counter
from pprint import pprint

from IPython.utils import io
from IPython.display import HTML, display
from ipywidgets import interact, Layout, HBox, VBox, Box
import ipywidgets as widgets
from IPython.display import clear_output

# from scipy.spatial.distance import jensenshannon
# from spacy.lang.en.stop_words import STOP_WORDS
# import en_core_sci_lg
import string


import pandas as pd
import re
import os
# For Ploting
import seaborn as sns

# Word Embedding
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import KFold

# File save
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
import seaborn as sns
import sys
import warnings
from matplotlib import pyplot as plt 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

if not sys.warnoptions:
    warnings.simplefilter("ignore")
from sklearn.model_selection import GridSearchCV 
import os
from pathlib import Path
path = Path(os.getcwd())
# print(path)



def PreRecCurve_fold_interpolation(df, axes):
    sns.set_style("whitegrid")
    plt.rcParams["figure.dpi"] = 500
    y_real = []
    y_proba = []
    
    genre = df.Genre.values[0]
    print(genre)
    
    pres = []
    aucs = []
    mean_rec = np.linspace(0, 1, 100)
    for i in range(5):
        pred_proba = df.loc[i, "Y_Proba"]
        ytest = df.loc[i, "Y_Test"] 
        precision, recall, _ = precision_recall_curve(ytest, pred_proba[:,1])
    
        recall = recall[::-1]
        precision = precision[::-1]
        interp_pre = np.interp(mean_rec, recall, precision)
        interp_pre[0] = 1.0
        lab = 'Fold %d AUC=%.4f' % (i+1, auc(mean_rec, interp_pre))

        axes.plot(mean_rec, interp_pre, label=lab, alpha = 0.3)

        pres.append(interp_pre)
      
        
    mean_pre = np.mean(pres, axis=0)
    lab = 'Overall AUC=%.4f' % (auc(mean_rec, mean_pre))
    axes.plot(mean_rec, mean_pre, label=lab, lw=2, color='#143c7d')
    
    fscore = (2 * mean_pre * mean_rec) / (mean_pre + mean_rec)
    # locate the index of the largest f score
    ix = argmax(fscore)
    lab = 'Best '+str(round(fscore[ix], 2))
    axes.scatter(mean_rec[ix], mean_pre[ix], marker='o', color='red', label=lab)
    axes.set_xlabel('Recall', fontsize=13)
    axes.set_ylabel('Precision',fontsize=13)
    axes.legend(loc='lower right', fontsize='small')
    axes.set_title(genre, fontsize=13, y = 1.0)
    
    
    
def PreRecCurve_fold_combined(df, axes):
    sns.set_style("whitegrid")
    plt.rcParams["figure.dpi"] = 500
    y_real = []
    y_proba = []
    
    genre = df.Genre.values[0]
    print(genre)
    for i in range(5):
        pred_proba = df.loc[i, "Y_Proba"]
        ytest = df.loc[i, "Y_Test"] 
        precision, recall, _ = precision_recall_curve(ytest, pred_proba[:,1])
        lab = 'Fold %d AUC=%.4f' % (i+1, auc(recall, precision))
        axes.plot(recall, precision, label=lab, alpha = 0.5)
        
        y_real.append(ytest)
        y_proba.append(pred_proba[:,1])
        
    y_real = np.concatenate(y_real)
    y_proba = np.concatenate(y_proba)
    precision, recall, _ = precision_recall_curve(y_real, y_proba)
    lab = 'Overall AUC=%.4f' % (auc(recall, precision))
    axes.plot(recall, precision, label=lab, lw=2, color='#143c7d')
    
    fscore = (2 * precision * recall) / (precision + recall)
    # locate the index of the largest f score
    ix = argmax(fscore)
    
    lab = 'Best '+str(round(fscore[ix], 2))
    axes.scatter(recall[ix], precision[ix], marker='o', color='red', label=lab)
    axes.set_xlabel('Recall', fontsize=13)
    axes.set_ylabel('Precision',fontsize=13)
    axes.legend(loc='lower right', fontsize='small')
    axes.set_title(genre, fontsize=13, y = 1.0)
    