from tabulate import tabulate
import pandas as pd

headers = ["Year","Title","URL","Do the authors discuss types of errors? (yes/no)","Amenable?","Mentions errors?","Contains an error analysis?"]

xls = pd.ExcelFile('erroranalysis-papers.xlsx')
df1 = pd.read_excel(xls, 'With Errors', usecols=headers)
df2 = pd.read_excel(xls, 'Without Errors', usecols=headers)

combination = pd.concat([df1, df2])

name = {2010: "INLG2010", 2015: "ENLG2015", 2020: "INLG2020"}  
index = {year: {"Total": 0, "Amenable": 0, "Error mention": 0, "Error analysis": 0, "Venue": name[year]} 
         for year in [2010, 2015, 2020]}

for row in combination.to_dict('records'):
    # Get relevant values:
    year = row["Year"]
    amenable = row['Amenable?'].lower()
    
    # Checks:
    assert year in [2010, 2015, 2020]
    assert amenable in ["yes","no"]
    
    # Update index
    index[year]["Total"] += 1
    if amenable == "yes":
        index[year]["Amenable"] += 1
        
        # Get relevant values:
        mention = row["Mentions errors?"].lower()
        analysis = row["Contains an error analysis?"].lower()
        
        # Checks:
        assert mention in ["yes","no"]
        assert analysis in ["yes","no"]
        
        if mention == "yes":
            index[year]["Error mention"] += 1
        
        if analysis == "yes":
            index[year]["Error analysis"] += 1




rows = []
headers = ["Venue", "Total", "Amenable", "Error mention", "Error analysis", "Percentage of amenable"]
for data in index.values():
    data["Percentage of amenable"] = (data["Error analysis"] / data["Amenable"]) * 100
    row = [data[header] for header in headers]
    rows.append(row)

table = tabulate(rows, headers=headers, tablefmt="latex_booktabs",floatfmt=".0f")
print(table)