import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
import sqlite3
from typing import List, Dict, Set, Iterator, Tuple
from scipy.stats import ttest_ind
import itertools
from RegressionTest import Regressiontest
from tabularDataImporter import TabularDataImporter
from dnn_regression import DNNRegressor
from linear_regression import LinearRegressor
from nn_linear_regression import NN1Regressor
from polynomial_regression import PolynomialRegressor



class Data:

    def __init__(self, dbfile:str, csv1:str, csv2:str, filter_unit=True, language="us"):

        dbcon: sqlite3.Connection = sqlite3.connect(dbfile)
        tables: List[str] = list(pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", dbcon)['name'])
        tabmap: Dict[str:pd.DataFrame] = {tbl : pd.read_sql_query(f"SELECT * from {tbl}", dbcon) for tbl in tables}

        if filter_unit:
            self.expectation_item: pd.DataFrame = tabmap["evalAdv_expectationitem"]
            self.distractor_item: pd.DataFrame = tabmap["evalAdv_distractoritem"]
            self.session_item: pd.DataFrame = tabmap["evalAdv_sessionitem"].loc[tabmap["evalAdv_sessionitem"]["unit_type"] == language]
            self.answer_item: pd.DataFrame = tabmap["evalAdv_answeritem"].loc[tabmap["evalAdv_answeritem"]["unit_type"] == language]
            self.combined_item: pd.DataFrame = tabmap["evalAdv_combinedsessionitem"].loc[tabmap["evalAdv_combinedsessionitem"]["unit_type"] == language]

            print("filtered data:")
            print(self.session_item["unit_type"].unique())
        else:
            self.expectation_item: pd.DataFrame = tabmap["evalAdv_expectationitem"]
            self.distractor_item: pd.DataFrame = tabmap["evalAdv_distractoritem"]
            self.session_item: pd.DataFrame = tabmap["evalAdv_sessionitem"]
            self.answer_item: pd.DataFrame = tabmap["evalAdv_answeritem"]
            self.combined_item: pd.DataFrame = tabmap["evalAdv_combinedsessionitem"]

        self.expectationFrame:pd.DataFrame
        self.distractorFrame:pd.DataFrame 
        self.expectationFrame, self.DistractorFrame = self.loadcsvItems(csv1, csv2)


        self.technical_data = TabularDataImporter("data.csv")
        self.technical_data.preprocess_all()

    def loadcsvItems(self, csv1:str, csv2:str) -> Tuple[pd.DataFrame, pd.DataFrame]:
        items: pd.DataFrame = pd.read_csv(csv1)
        dists: pd.DataFrame = pd.read_csv(csv2)
        return (items, dists)


    def spot_invalid_outliers(self):
        pd.set_option('display.max_columns', None)
        pd.set_option('display.max_rows', None)
        
        counter = 0
        for index, row in self.session_item.iterrows():
            outlier = self.expectation_item.loc[self.expectation_item["item_id"]==row["item_id_id"]]
            if row["upper_limes"] > outlier["max_val"].item():
                counter +=1
                #print(row)
        print(counter, "invalid outliers where input is not within permitted input range (illegal operation)")




    def plot_car_possession(self):

        this_data:pd.DataFrame = self.answer_item[self.answer_item["question_id_id"] == 1]
        print(this_data)

        fig, axes = plt.subplots(1, 1, figsize=(18, 10))

        fig.suptitle("car ownership")
        sns.histplot(data=this_data, x="answer_string")
        plt.show()
        
    def plot_distractor_generic(self):

        for idn in self.DistractorFrame["question_id"]:
            
            print(idn, type(idn))

            this_data:pd.DataFrame = self.answer_item.loc[self.answer_item["question_id_id"] == idn]
            this_task:pd.DataFrame = self.DistractorFrame.loc[self.DistractorFrame["question_id"] == idn]
            this_task:str = this_task["question_text_us"].item()
            print(this_task, type(this_task))

            fig, axes = plt.subplots(1, 1, figsize=(18, 10))

            fig.suptitle(this_task)
            sns.histplot(data=this_data, x="answer_string")
            plt.show()


    def filter_unmodified_items(self):
        pass


        for idn in self.expectationFrame["item_id"]:
            lower_def:float = self.expectationFrame.loc[self.expectationFrame["item_id"] == idn, "lower_def"].item()
            upper_def:float = self.expectationFrame.loc[self.expectationFrame["item_id"] == idn, "upper_def"].item()
            print(idn, lower_def, upper_def)

            self.session_item.drop(self.session_item[(self.session_item["lower_limes"] == lower_def) & (self.session_item["upper_limes"] == upper_def) & (self.session_item["item_id_id"] == idn)].index, inplace=True)
        

    def plot_items_generic(self):
        
        for idn in self.expectationFrame["item_id"]:
            idn:int = idn
            
            this_item:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"] == idn]
            this_task:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["item_id"] == idn]
            this_task:str = this_task["expression_us"].item()
            print(this_task, type(this_task))

            fig, axes = plt.subplots(1, 1, figsize=(18, 10))

            fig.suptitle(this_task)

            print(this_item[["lower_limes", "upper_limes"]])

            sns.boxplot(data=this_item[["lower_limes", "upper_limes"]], orient="h")
            plt.show()


    def plot_items_generic_grouped(self):
        
        for group_idn in self.expectationFrame["group"].unique():
            group_idn:str = group_idn
            #print(group_idn)

            
            
            this_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == group_idn, ["item_id", "polarity", "expression_us"]]




            #print(this_group)
            sorted_group:pd.DataFrame = this_group.sort_values(by=["polarity"])
            #print(sorted_group)
            row_num:int = len(sorted_group.index)

            fig, axes = plt.subplots(row_num, 1, figsize=(18, 10))
            

            pos = 0
            for index, row in sorted_group.iterrows():
                #print(row["item_id"])
                this_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"] == row["item_id"]]
                #print(this_data)

                this_task:str = row["expression_us"]
                print(this_task, type(this_task))

            
                axx = axes[pos]
                axx.set_ylim(1, 50)
                axx.set_title(this_task)
                axx.set_xlabel("target feature")
                sns.histplot(data=this_data["lower_limes"], ax=axx, kde=True, bins=10)
                sns.histplot(data=this_data["upper_limes"], ax=axx, kde=True, bins=10)
                pos += 1
            plt.show()


    def significance_grouped(self):
        
        for group_idn in self.expectationFrame["group"].unique():
            group_idn:str = group_idn
            #print(group_idn)

            
            
            this_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == group_idn, ["item_id", "polarity", "expression"]]




            #print(this_group)
            sorted_group:pd.DataFrame = this_group.sort_values(by=["polarity"])
            #print(sorted_group)
            row_num:int = len(sorted_group.index)

            fig, axes = plt.subplots(row_num, 1, figsize=(18, 10))
            
            data_dict = {}
            pos = 0
            for index, row in sorted_group.iterrows():
                #print(row["item_id"])
                this_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"] == row["item_id"]]
                #print(this_data)

                data_dict[index] = (this_data, row["polarity"], row["expression"])
            
            
            print("significance test: group ", group_idn)
            combis = itertools.combinations(list(data_dict.keys()), 2) 
            for t in combis:

                i,j = t

                ttest_var_lower = ttest_ind(data_dict[i][0]["lower_limes"], data_dict[j][0]["lower_limes"])
                ttest_var_upper = ttest_ind(data_dict[i][0]["upper_limes"], data_dict[j][0]["upper_limes"])

                print("\t polarities:", data_dict[i][1], " vs. ", data_dict[j][1])
                print("\t expressions: ")
                print("\t", data_dict[i][1], "-->",  data_dict[i][2])
                print("\t", data_dict[j][1], "-->",  data_dict[j][2])
                
                print("\t lower border; value: ", ttest_var_lower[0], "; residual probability: ", ttest_var_lower[1])
                print("\t upper border; value:", ttest_var_upper[0], "; residual probability: ", ttest_var_upper[1])
                print("\t ##############")


    def plot_items_combined(self):
        
    

    
            this_data:pd.DataFrame = self.combined_item
            #print(this_data)

            fig, axes = plt.subplots(3, 1, figsize=(18, 10))
            
            for x in range(3):
                axx = axes[x]
            
                axx.set_ylim(1, 50)
                sns.histplot(data=this_data[f"lower_limes_{x+1}"], ax=axx, kde=True, bins=15)
                sns.histplot(data=this_data[f"upper_limes_{x+1}"], ax=axx, kde=True, bins=15)
            plt.show()


    def significance_items_combined(self):
        
    

    
            this_data:pd.DataFrame = self.combined_item
            #print(this_data)

            
            
            ttest_var_lower_1 = ttest_ind(this_data["lower_limes_1"], this_data["lower_limes_2"])
            ttest_var_lower_2 = ttest_ind(this_data["lower_limes_2"], this_data["lower_limes_3"])
            ttest_var_lower_3 = ttest_ind(this_data["lower_limes_1"], this_data["lower_limes_3"])
            


            ttest_var_upper_1 = ttest_ind(this_data["upper_limes_1"], this_data["upper_limes_2"])
            ttest_var_upper_2 = ttest_ind(this_data["upper_limes_2"], this_data["upper_limes_3"])
            ttest_var_upper_3 = ttest_ind(this_data["upper_limes_1"], this_data["upper_limes_3"])

            print(ttest_var_lower_1)
            print(ttest_var_lower_2)
            print(ttest_var_lower_3)
            print(ttest_var_upper_1)
            print(ttest_var_upper_2)
            print(ttest_var_upper_3)





    def significance_crosstest(self):
        
    

            this_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "c", ["item_id", "polarity", "expression"]]
 
            combined_data:pd.DataFrame = self.combined_item

            print(this_group.columns)
            print(combined_data.columns)
            #print(this_data)

            neg_id = this_group.loc[this_group["polarity"] == -2]["item_id"].item()
            pos_id = this_group.loc[this_group["polarity"] == 2]["item_id"].item()
            neut_id = this_group.loc[this_group["polarity"] == 0]["item_id"].item()
            
            neg_set = self.session_item.loc[self.session_item["item_id_id"] == neg_id]
            neut_set = self.session_item.loc[self.session_item["item_id_id"] == neut_id]
            pos_set = self.session_item.loc[self.session_item["item_id_id"] == pos_id]
            
           


            ttest_var_lower_neg = ttest_ind(combined_data["lower_limes_1"], neg_set["lower_limes"])
            ttest_var_upper_neg = ttest_ind(combined_data["upper_limes_1"], neg_set["upper_limes"])
            
            ttest_var_lower_neut = ttest_ind(combined_data["lower_limes_2"], neut_set["lower_limes"])
            ttest_var_upper_neut = ttest_ind(combined_data["upper_limes_2"], neut_set["upper_limes"])
            
            ttest_var_lower_pos = ttest_ind(combined_data["lower_limes_3"], pos_set["lower_limes"])
            ttest_var_upper_pos = ttest_ind(combined_data["upper_limes_3"], pos_set["upper_limes"])
            
            print(ttest_var_lower_neg)
            print(ttest_var_upper_neg)
            print(ttest_var_lower_neut)
            print(ttest_var_upper_neut)
            print(ttest_var_lower_pos)
            print(ttest_var_upper_pos)




    def plot_items_bellcurve(self):

        
        for group_idn in self.expectationFrame["group"].unique():
            group_idn:str = group_idn
            #print(group_idn)

            
            
            this_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == group_idn, ["item_id", "polarity", "expression"]]




            #print(this_group)
            sorted_group:pd.DataFrame = this_group.sort_values(by=["polarity"])
            #print(sorted_group)
            row_num:int = len(sorted_group.index)

            this_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(sorted_group["item_id"])]
            expecdata:pd.DataFrame = self.expectation_item.loc[self.expectation_item["item_id"].isin(sorted_group["item_id"])]
            feature_x = expecdata["target_feature"].tolist()[0]
            feature_given = expecdata["source_feature"].tolist()[0]

            
            merged:pd.DataFrame = this_data.merge(sorted_group, left_on=["item_id_id"], right_on=["item_id"])        

            meltdf = pd.melt(merged, id_vars=[x for x in list(merged.columns) if x not in ["lower_limes", "upper_limes"]], value_vars=["lower_limes", "upper_limes"], var_name="limes", value_name="limes_value")
        
            print(merged)
            
            en_lookup = {"LeistungPS": "power (hp)",
                        "Hoechstgeschwindigkeit":"maximum speed (km/h)",
                        "Beschleunigung":"acceleration (sec.)",
                        "VerbrauchGesamt":"mileage (l/100 km)",
                        "Hubraum":"displacement (litres)"

            }

            g = sns.displot(data=meltdf, x="limes_value", hue="polarity", col="limes", kde=True, element="step", palette=sns.diverging_palette(250, 30, l=65, center="dark").as_hex())
            g.set(xlabel=f"{en_lookup[feature_x]} (given {en_lookup[feature_given]})", ylabel = "")
            axes = g.axes.flat
            lss = [':', '--', '-.']

            for ax in axes:
                for line, ls in zip(ax.lines, lss):
                    line.set_linestyle(ls)
                    line.set_linewidth(3)
                    
            handles = g._legend.legendHandles[::-1]
            for handle, ls in zip(handles, lss):
                handle.set_ls(ls)
        


            plt.show()

    def plot_items_combined_bellcurve(self):


        #reformat data
        this_data:pd.DataFrame = self.combined_item
        columns_remaining=["id", "prolific_id", "unit_type", "participant_id"]
        row_dict = {}
        pols = [-2, 0, 2]
        col_names = this_data.columns
        print(list(col_names))
        new_id = 0
        for index, row in this_data.iterrows():
            for i in range(3):
                #new_id += 1
                #row_dict[new_id] = row_dict.get(new_id, {})
                for c in columns_remaining:
                    row_dict[c] = row_dict.get(c, [])
                    row_dict[c].append(row[c])
                row_dict["upper_limes"] = row_dict.get("upper_limes", [])
                row_dict["upper_limes"].append(row["upper_limes_" + str(i+1)])
                row_dict["lower_limes"] = row_dict.get("lower_limes", [])
                row_dict["lower_limes"].append(row["lower_limes_" + str(i+1)])
                row_dict["polarity"] = row_dict.get("polarity", [])
                row_dict["polarity"].append(pols[i])
    
        for key in row_dict.keys():
            print(key, len(row_dict[key]))

        dictdf:pd.DataFrame = pd.DataFrame(row_dict) 

        meltdf = pd.melt(dictdf, id_vars=["id", "polarity", "participant_id", "prolific_id", "unit_type"], value_vars=["lower_limes", "upper_limes"], var_name="limes", value_name="limes_value")
        print(meltdf)
        #fig, ax = plt.subplots()
       
        g = sns.displot(data=meltdf, x="limes_value", hue="polarity", col="limes", kde=True, hue_order=[-2, 0, 2], element="step", palette=sns.diverging_palette(250, 30, l=65, center="dark").as_hex())
        g.set(xlabel ="Maximum speed", ylabel = "")
        g.set(xlim=(0, 500))
        #g.set(yticks=[])
        axes = g.axes.flat
        lss = [':', '--', '-.']

        for ax in axes:
            for line, ls in zip(ax.lines, lss):
                line.set_linestyle(ls)
                line.set_linewidth(3)
                
        handles = g._legend.legendHandles[::-1]
        for handle, ls in zip(handles, lss):
            handle.set_ls(ls)
    
        
        
        plt.show()


    def estimation_data_match(self):



        def create_hisplot_boxplot(study_data, tech_data, xh, xb):
            sns.set(style="darkgrid")

            
            # creating a figure composed of two matplotlib.Axes objects (ax_box and ax_hist)
            f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)})
            #f,g = plt.subplots()

            # assigning a graph to each ax
            sns.histplot(data=tech_data, x=xb, ax=ax_box)
            
            sns.kdeplot(data=study_data, hue="polarity", x=xh, ax=ax_hist)

            
            
            # Remove x axis name for the boxplot
            ax_box.set(xlabel='')
            ax_hist.set(xlabel=xb)
            plt.show()


            
            
        pd.set_option('display.max_rows', None)
        pd.set_option('display.max_columns', None)
        # group a
        # given speed (200), predict accel
        technical_data_a = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['Hoechstgeschwindigkeit'] >= 195) & (self.technical_data.dataFrame['Hoechstgeschwindigkeit'] <= 205)]
        
        a_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "a", ["item_id", "polarity", "expression"]]
        a_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(a_group["item_id"])]
        this_data_a = a_data.merge(a_group,  left_on="item_id_id", right_on="item_id")        
        technical_data_a = technical_data_a.reset_index()
        create_hisplot_boxplot(this_data_a, technical_data_a, "lower_limes", "Beschleunigung")
        create_hisplot_boxplot(this_data_a, technical_data_a, "upper_limes", "Beschleunigung")

        # groub b
        # given displacement (3.5), predict power
        technical_data_b = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['Hubraum'] >= 3000) & (self.technical_data.dataFrame['Hubraum'] <= 4000)]
        b_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "b", ["item_id", "polarity", "expression"]]
        b_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(b_group["item_id"])]
        this_data_b = b_data.merge(b_group, left_on="item_id_id", right_on="item_id")
        technical_data_b = technical_data_b.reset_index()
        create_hisplot_boxplot(this_data_b, technical_data_b, "lower_limes", "LeistungPS")
        create_hisplot_boxplot(this_data_b, technical_data_b, "upper_limes", "LeistungPS")

        
        # group c
        # given power (350), predict speed
        technical_data_c = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['LeistungPS'] >= 345) & (self.technical_data.dataFrame['LeistungPS'] <= 355)]
        c_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "c", ["item_id", "polarity", "expression"]]
        c_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(c_group["item_id"])]
        this_data_c = c_data.merge(c_group, left_on="item_id_id", right_on="item_id")
        technical_data_c = technical_data_c.reset_index()
        create_hisplot_boxplot(this_data_c, technical_data_c, "lower_limes", "Hoechstgeschwindigkeit")
        create_hisplot_boxplot(this_data_c, technical_data_c, "upper_limes", "Hoechstgeschwindigkeit")




        # group d
        # given power (250), predict mileage
        technical_data_d = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['LeistungPS'] >= 245) & (self.technical_data.dataFrame['LeistungPS'] <= 255)]
        d_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "d", ["item_id", "polarity", "expression"]]
        d_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(d_group["item_id"])]
        this_data_d = d_data.merge(d_group, left_on="item_id_id", right_on="item_id")
        technical_data_d = technical_data_d.reset_index()
        create_hisplot_boxplot(this_data_d, technical_data_d, "lower_limes", "VerbrauchGesamt")
        create_hisplot_boxplot(this_data_d, technical_data_d, "upper_limes", "VerbrauchGesamt")

        # group e
        # given power (200), predict accel
        technical_data_e = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['LeistungPS'] >= 195) & (self.technical_data.dataFrame['LeistungPS'] <= 205)]
        e_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "e", ["item_id", "polarity", "expression"]]
        e_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(e_group["item_id"])]
        this_data_e = e_data.merge(e_group, left_on="item_id_id", right_on="item_id")
        technical_data_e = technical_data_e.reset_index()
        create_hisplot_boxplot(this_data_e, technical_data_e, "lower_limes", "Beschleunigung")
        create_hisplot_boxplot(this_data_e, technical_data_e, "upper_limes", "Beschleunigung")
        
        


    def regression_data_estimation_match(self):


        def create_hisplot_boxplot(study_data, tech_data, xh, xb, regression_results):
            sns.set(style="darkgrid")

            
            # creating a figure composed of two matplotlib.Axes objects (ax_box and ax_hist)
            f, (ax_scatter, ax_box, ax_hist) = plt.subplots(3, sharex=True, gridspec_kw={"height_ratios": (0.15, 0.15, 0.85)})
            #f,g = plt.subplots()

            vect = [x for x in regression_results.values()]
            vect_std_sub = [x - self.technical_data.get_std(xb) for x in regression_results.values()]
            vect_std_add = [x + self.technical_data.get_std(xb) for x in regression_results.values()]
            
            regrdf = pd.DataFrame(vect, columns=[xb])
            regrdf1 = pd.DataFrame(vect_std_add, columns=[xb])
            regrdf2 = pd.DataFrame(vect_std_sub, columns=[xb])
            
            
            print(regrdf)
            sns.stripplot(regrdf, x=xb, ax=ax_scatter, color="lightblue", marker="*", s=8)
            sns.stripplot(regrdf1, x=xb, ax=ax_scatter, color="blue", marker="^", s=8)
            sns.stripplot(regrdf2, x=xb, ax=ax_scatter, color="blue", marker="v", s=8)
        
            sns.histplot(data=tech_data, x=xb, ax=ax_box)
            
            sns.kdeplot(data=study_data, hue="polarity", x=xh, ax=ax_hist)

            # Remove x axis name for the boxplot
            ax_box.set(xlabel='')
            ax_hist.set(xlabel="maximum speed (upper threshold)") #xb
            plt.show()


            
            
        pd.set_option('display.max_rows', None)
        pd.set_option('display.max_columns', None)
        # group a
        # given speed (200), predict accel
        technical_data_a = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['Hoechstgeschwindigkeit'] >= 195) & (self.technical_data.dataFrame['Hoechstgeschwindigkeit'] <= 205)]
        
        r_a = Regressiontest([DNNRegressor, PolynomialRegressor, LinearRegressor], "data.csv", ["Hoechstgeschwindigkeit"], ["Beschleunigung"])
        regres = r_a.predict_single_point([("Hoechstgeschwindigkeit", 200)])

        a_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "a", ["item_id", "polarity", "expression"]]
        a_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(a_group["item_id"])]
        this_data_a = a_data.merge(a_group,  left_on="item_id_id", right_on="item_id")        
        technical_data_a = technical_data_a.reset_index()
        create_hisplot_boxplot(this_data_a, technical_data_a, "lower_limes", "Beschleunigung", regres)
        create_hisplot_boxplot(this_data_a, technical_data_a, "upper_limes", "Beschleunigung", regres)

        # groub b
        # given displacement (3.5), predict power
        r_b = Regressiontest([DNNRegressor, PolynomialRegressor, LinearRegressor], "data.csv", ["Hubraum"], ["LeistungPS"])
        regres = r_b.predict_single_point([("Hubraum", 3.5)])

        technical_data_b = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['Hubraum'] >= 3000) & (self.technical_data.dataFrame['Hubraum'] <= 4000)]
        b_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "b", ["item_id", "polarity", "expression"]]
        b_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(b_group["item_id"])]
        this_data_b = b_data.merge(b_group, left_on="item_id_id", right_on="item_id")
        technical_data_b = technical_data_b.reset_index()
        create_hisplot_boxplot(this_data_b, technical_data_b, "lower_limes", "LeistungPS", regres)
        create_hisplot_boxplot(this_data_b, technical_data_b, "upper_limes", "LeistungPS", regres)

        
        # group c
        # given power (350), predict speed
        r_c = Regressiontest([DNNRegressor, PolynomialRegressor, LinearRegressor], "data.csv", ["LeistungPS"], ["Hoechstgeschwindigkeit"])
        regres = r_c.predict_single_point([("LeistungPS", 350)])

        technical_data_c = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['LeistungPS'] >= 345) & (self.technical_data.dataFrame['LeistungPS'] <= 355)]
        c_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "c", ["item_id", "polarity", "expression"]]
        c_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(c_group["item_id"])]
        this_data_c = c_data.merge(c_group, left_on="item_id_id", right_on="item_id")
        technical_data_c = technical_data_c.reset_index()
        create_hisplot_boxplot(this_data_c, technical_data_c, "lower_limes", "Hoechstgeschwindigkeit", regres)
        create_hisplot_boxplot(this_data_c, technical_data_c, "upper_limes", "Hoechstgeschwindigkeit", regres)




        # group d
        # given power (250), predict mileage

        r_d = Regressiontest([DNNRegressor, PolynomialRegressor, LinearRegressor], "data.csv", ["LeistungPS"], ["VerbrauchGesamt"])
        regres = r_d.predict_single_point([("LeistungPS", 250)])


        technical_data_d = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['LeistungPS'] >= 245) & (self.technical_data.dataFrame['LeistungPS'] <= 255)]
        d_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "d", ["item_id", "polarity", "expression"]]
        d_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(d_group["item_id"])]
        this_data_d = d_data.merge(d_group, left_on="item_id_id", right_on="item_id")
        technical_data_d = technical_data_d.reset_index()
        create_hisplot_boxplot(this_data_d, technical_data_d, "lower_limes", "VerbrauchGesamt", regres)
        create_hisplot_boxplot(this_data_d, technical_data_d, "upper_limes", "VerbrauchGesamt", regres)

        # group e
        # given power (200), predict accel

        r_e = Regressiontest([DNNRegressor, PolynomialRegressor, LinearRegressor], "data.csv", ["LeistungPS"], ["Beschleunigung"])
        regres = r_e.predict_single_point([("LeistungPS", 200)])

        technical_data_e = self.technical_data.dataFrame.loc[(self.technical_data.dataFrame['LeistungPS'] >= 195) & (self.technical_data.dataFrame['LeistungPS'] <= 205)]
        e_group:pd.DataFrame = self.expectationFrame.loc[self.expectationFrame["group"] == "e", ["item_id", "polarity", "expression"]]
        e_data:pd.DataFrame = self.session_item.loc[self.session_item["item_id_id"].isin(e_group["item_id"])]
        this_data_e = e_data.merge(e_group, left_on="item_id_id", right_on="item_id")
        technical_data_e = technical_data_e.reset_index()
        create_hisplot_boxplot(this_data_e, technical_data_e, "lower_limes", "Beschleunigung", regres)
        create_hisplot_boxplot(this_data_e, technical_data_e, "upper_limes", "Beschleunigung", regres)
        



d = Data("studyEvalAdv_US_50.sqlite3", "items.csv", "distractors.csv", language="metric")
d.filter_unmodified_items()
d.plot_car_possession()
d.plot_distractor_generic()
d.plot_items_generic_grouped()
d.significance_grouped()
d.plot_items_combined()
d.significance_items_combined()
d.plot_items_bellcurve()
d.plot_items_combined_bellcurve()
d.significance_crosstest()
d.regression_data_estimation_match()