from scipy import special

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#import seaborn as sns

import os
import re

# sns.set_theme(style="darkgrid")
# tips = sns.load_dataset("tips")
# sns.relplot(x="total_bill", y="tip", data=tips).savefig("test.png")

csv_path = "content_function_nonproc"
csv_surf_path = "../lexstats_surface/content_function"

csvfiles = os.listdir(csv_path)
csvfiles = [fname for fname in csvfiles if not "_all" in fname]

csvsurfs = os.listdir(csv_surf_path)
csvsurfs = [fname for fname in csvsurfs if not "_all" in fname]

#print(csvfiles)
#print(categories)

# indices from 0 to 12

rank_lens = list()

fig = plt.figure()
fig.set_size_inches(7, 5)
#fig.tight_layout()

# X11 colors for plots
x11_colors = ['red', 'black', 'sienna', 'purple', 'darkorange', 
 'gold', 'greenyellow', 'darkgreen', 'lightseagreen', 'grey',
 'blue', 'cyan', 'hotpink', 'silver', 'slateblue', 'darkseagreen',
 'sandybrown']

ax = fig.subplots()

df = pd.read_csv(os.path.join(csv_path, csvfiles[0]))
df = df.rename(columns={df.columns[0]: "categories", df.columns[1]: "ratio"})

df_s = pd.read_csv(os.path.join(csv_surf_path, csvsurfs[0]))
df_s = df_s.rename(columns={df_s.columns[0]: "categories", df_s.columns[1]: "ratio"})

# for row in df.itertuples():
#     print(row.mask_id)

cat = [row.categories for row in df.itertuples()]
cat.extend([row.categories for row in df_s.itertuples()])

ratio = [row.ratio for row in df.itertuples()]
ratio.extend([row.ratio for row in df_s.itertuples()])

bar_colors = ['#7fc97f'] * 10 + ['#fdc086'] * 3

ax.bar(cat, ratio, color=bar_colors, width=0.7)
#ax.legend()
#plt.xscale('linear')
plt.yscale('linear')

#plt.title('Mean Content-Function Word Distribution of each Category in CoDA and the Surface Web')
plt.xticks(rotation=75, fontsize=8)
plt.xlabel('Category')
plt.ylabel('Mean CF Ratio')
plt.tight_layout()

#plt.show()
plt.savefig('cf_ratio_surf.pdf')