import os, re, sys, json, csv, string, gzip
import pandas as pd
import pickle as pkl 


def read_quotes(root_folder, mappers, novel, use_minor_char=True):
	quoteinf = []

	# for novel in NOVELS:
	quotedf = pd.read_csv(os.path.join(root_folder, novel, 'quote_info.csv'))
	charInf = pkl.load(open(os.path.join(root_folder, novel, 'charInfo.dict.pkl'), 'rb'))
	for _, row in quotedf.iterrows():
		sb, eb = eval(row['qSpan'])
		qtt = row['qText']
		asb, aeb = get_offset_bytes(qtt, sb, eb)
		qType = row['qType']
		qId = row['qID']
		
		try:
			st, et = mappers[asb], mappers[aeb-1]
			speaker = row['speaker']
			speaker_id = charInf['name2id'][speaker]
			eid = "CHAR_" + str(speaker_id)
			etype = row["speakerType"]
   
			# Remove minor characters if asked 
			if use_minor_char:
				quoteinf.append((qId, st, et, eid, qType))
			elif etype != "minor":
				quoteinf.append((qId, st, et, eid, qType))
    
		except KeyError:
			print("Error!!! Novel {} qID {}".format(novel, qId))

	print("Read {}/{} quotes".format(len(quoteinf), len(quotedf)))
	return quoteinf

def group_quotes_by_speaker(root_folder, mappers, novel, use_minor_char=True) : 
    quotedf = pd.read_csv(os.path.join(root_folder, novel, 'quote_info.csv'))
    charInf = pkl.load(open(os.path.join(root_folder, novel, 'charInfo.dict.pkl'), 'rb'))
    char_dict = {}
    for sname in quotedf["speaker"].unique(): 
        char_type = quotedf[quotedf["speaker"]==sname]["speakerType"].iloc[0]
        if (char_type == "minor") & (not use_minor_char):
            pass 
        else : 
            sub = quotedf[quotedf["speaker"]==sname]
            qids = sub["qID"].tolist()
            qtext = sub["qText"].tolist()
            eid = charInf['name2id'][sname]
            char_dict[eid] = (qids, qtext)
            