import CONFIG as C
from scipy.stats import pearsonr
import pandas as pd

CONFIG = {
	"text_file": "1pl_diff_reduced_vs_2pl_discrim_clustered_features/output_diff.txt",
	"out_file": "test_diff.csv",
	"working_dir": "feature_importance_to_feature_direction"
}

features = C.load_reduced_data(file_name="all_features_fixed_manual.csv", targets=C.OriginalDataset.DFGN_148_reduced.onePL_var_irt_diff)
data = []
extra = []
with open(CONFIG["text_file"], "r") as f:
	idx = 0
	place = 0
	for line in f.readlines():
		if place == 1:
			extra.append(float(line[0:-1]))
		if idx % 3 == 0 and idx != 1 and idx != 2:
			feature_name = line[0: -1]
			if feature_name == "ProportionPassiveVPs_answer_121":
				print(features[feature_name])
			corr = pearsonr(features[feature_name], features["target"])
			data.append((feature_name, corr[0], corr[1], abs(corr[0])))
		if place == 2:
			place = 0
		else:
			place += 1
		idx += 1
data_frame = pd.DataFrame(columns=["FeatureName", "PearsonScore", "PValue", "PearsonScoreABS"], data=data)
data_frame["ImportanceScore"] = extra
print(data_frame["ImportanceScore"])
print(data_frame)
data_frame.sort_values(by="PearsonScoreABS", inplace=True)
data_frame.to_csv(f"{CONFIG['working_dir']}/{CONFIG['out_file']}", index=False)
