# -*- coding: UTF-8 -*-

from classifier import Classifier, classification_report, np, make_pipeline
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

class SVM(Classifier):
    def __init__(self):
        super().__init__()
        self.model = LinearSVC(multi_class='ovr', tol=0.01, C=1.0, max_iter=100000)


    def fit(self, pipelining=False):
        if pipelining:
            self.param_grid = {'linearsvc__loss': ('squared_hinge', 'hinge'), 'linearsvc__tol': (0.01, 0.05, 0.001 ,0.005, 0.5, 0.1), 'linearsvc__C': (0.01, 0.05, 0.001, 0.005, 0.5, 0.1)}
            self.pipeline = make_pipeline(self.vectorizer, self.model)
            #self.param_grid.update({'tfidfvectorizer__max_df': np.linspace(.3, .7, 5), 'tfidfvectorizer__min_df': np.linspace(.3, .7, 5)})
            self.model = GridSearchCV(self.pipeline, self.param_grid, cv=10, scoring='accuracy', verbose=5)
            self.model.fit(self.train_set.X, self.train_set.Y)
            self.vectorizer = self.model.best_estimator_['tfidfvectorizer']
        else:
            self.param_grid = {'tol': [1.0, 0.5, 0.1, 0.01, 0.05, 0.001, 0.005], 'C': [1.0, 0.5, 0.1, 0.01, 0.05, 0.001, 0.005]}
            self.vectorize()
            self.model = GridSearchCV(self.model, self.param_grid, cv=10, scoring=('balanced_accuracy'), verbose=5)
            self.model.fit(self.train_set.X, self.train_set.Y)
            print(self.model.best_params_)


    def predict(self):
        self.test_set.X = self.vectorizer.transform(self.test_set.X)
        y_pred = self.model.predict(self.test_set.X)
        return classification_report(self.test_set.Y, y_pred, labels=np.unique(y_pred), digits=4)


def main():
    svm = SVM()
    svm.load_dataset()
    svm.split_dataset()
    svm.fit()
    report = svm.predict()
    print(report)


if __name__ == '__main__':
    main()
