import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tabularDataImporter import TabularDataImporter
from typing import List, Dict, Tuple
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline, make_pipeline




class PolynomialRegressor:
    def __init__(self, csvfile:str, predictors:List[str], response:List[str])->pd.DataFrame:
        
        self.data:pd.DataFrame = TabularDataImporter(csvfile)
        self.data.preprocess_all()
        self.data.encodeCategorical()

        self.X_train, self.X_test, self.y_train, self.y_test = self.data.split_train_test_set(predictors, response)
        
        self.polynomial_features = PolynomialFeatures(degree=2)
        self.model= LinearRegression()

    def toString(self):
        return "PolynomialRegressor"


    def train(self, epochs=None)->None:
        self.X_train_poly = self.polynomial_features.fit_transform(self.X_train)
        self.model.fit(self.X_train_poly, self.y_train)

    def analyse_loss(self):
        self.X_test_poly = self.polynomial_features.fit_transform(self.X_test)
        y_test_poly = self.model.predict(self.X_test_poly)
        plt.scatter(y_test_poly, self.y_test)
        plt.xlabel('pred')
        plt.ylabel('observed')
        plt.legend()
        plt.grid(True)
        plt.show()

    def response_predict(self, test_data):
        poly_test_data = self.polynomial_features.fit_transform(test_data)
        return  self.model.predict(poly_test_data)

    def evaluate(self): 
        self.X_test_poly = self.polynomial_features.fit_transform(self.X_test)
        y_poly = self.model.predict(self.X_test_poly)
        print("Coefficients: \n", self.model.coef_) #named_steps["lre"].coef_
        print("Mean squared error: %.2f" % mean_squared_error(self.y_test, y_poly))
        print("Coefficient of determination: %.2f" % r2_score(self.y_test, y_poly))
    
        return mean_squared_error(self.y_test, y_poly), r2_score(self.y_test, y_poly), self.model.coef_



# lre = PolynomialRegressor("data.csv", ["LeistungPS"], ["Beschleunigung"])
# lre.train()
# lre.analyse_loss()
# lre.evaluate()

