import numpy as np
import pandas as pd
import random
from sklearn import svm
from sklearn.metrics import precision_recall_fscore_support

def train_word2vec_svm_cv(dataset, k, word2vec_mat, features):
'''
	dataset: all the test data with computed features
	k: k-fold cross validation
	word2vec_mat: the matrix containing embeddings of each sentence
	features: the computed features involved in prediction
'''
    p_list = []
    r_list = []
    f_list = []
    acc_list = []
    N = dataset.shape[0]
    index_perm = np.random.permutation(N).tolist()
    index_start = list(range(0, N, N // k))[:k] + [N]
    for i in range(k):
        s = index_start[i]
        t = index_start[i+1]
        index_test = index_perm[s:t]
        index_train = index_perm[:s] + index_perm[t:]
        train_dataset = dataset.iloc[index_train]
        test_dataset = dataset.iloc[index_test]
        if features == []:
            X_train = []
            y_train = []
            X_test = []
            y_test = []
            for i in index_train:
                X_train.append(list(word2vec_mat[i, :]))
            y_train = list(train_dataset['label'])
            for i in index_test:
                X_test.append(list(word2vec_mat[i, :]))
            y_test = list(test_dataset['label'])

        else:
            X_train = []
            y_train = []
            X_test = []
            y_test = []
            for i, j in zip(index_train, range(len(train_dataset))):
                append_feature_list = []
                for feat in features:
                    append_feature_list.append(train_dataset.iloc[j][feat])
                X_train.append(list(word2vec_mat[i, :]) + append_feature_list)
            y_train = list(train_dataset['label'])
            for i, j in zip(index_test, range(len(test_dataset))):
                append_feature_list = []
                for feat in features:
                    append_feature_list.append(test_dataset.iloc[j][feat])
                X_test.append(list(word2vec_mat[i, :]) + append_feature_list)
            y_test = list(test_dataset['label'])

        clf = svm.SVC(C = 1.0, kernel = 'rbf', gamma = 'auto')
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)
        acc = np.mean(y_test == y_pred)
        p, r, f, _ = precision_recall_fscore_support(y_test, y_pred, average = 'macro')
        
        p_list.append(p)
        r_list.append(r)
        f_list.append(f)
        acc_list.append(acc)
      
    return np.mean(p_list), np.mean(r_list), np.mean(f_list), np.mean(acc_list)

def train_svm_cv(dataset, k, feature):
'''
	dataset: all the test data with computed features
	k: k-fold cross validation
	features: the computed features involved in prediction
'''
    p_list = []
    r_list = []
    f_list = []
    acc_list = []
    N = dataset.shape[0]
    index_perm = np.random.permutation(N).tolist()
    index_start = list(range(0, N, N // k))[:k] + [N]
    for i in range(k):
        s = index_start[i]
        t = index_start[i+1]
        index_test = index_perm[s:t]
        index_train = index_perm[:s] + index_perm[t:]
        df_train = dataset.iloc[index_train]
        df_test = dataset.iloc[index_test]
        
        if len(feature) == 1:
            X_train = np.expand_dims(df_train[feature[0]].values, 1)
            y_train = df_train['label'].values
            X_test = np.expand_dims(df_test[feature[0]].values, 1)
            y_test = df_test['label'].values
        else:
            X_train = df_train[feature].values
            y_train = df_train['label'].values
            X_test = df_test[feature].values
            y_test = df_test['label'].values

        clf = svm.SVC(C = 1.0, kernel = 'rbf', gamma = 'auto')
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)
        acc = np.mean(y_test == y_pred)
        p, r, f, _ = precision_recall_fscore_support(y_test, y_pred, average = 'macro')
        
        p_list.append(p)
        r_list.append(r)
        f_list.append(f)
        acc_list.append(acc)
  
    return np.mean(p_list), np.mean(r_list), np.mean(f_list), np.mean(acc_list)