#------------------------------------------------#
def get_meta(filename):
	
	meta = filename.split(".")
	register = meta[0]
	language = meta[1]
	country = meta[2]
	samples = meta[3]
	type = meta[4]
	feature = meta[5]
	
	return register, country, feature
	
#------------------------------------------------#
def get_vectors(language, type, register, feature):
	
	#Get vectors
	x_list = []
	y_list = []
	
	current_path = os.path.join(".", "vectors", language, type)
	for filename in os.listdir(current_path):
		skip = False
		current_register, current_country, current_feature = get_meta(filename)
			
		if current_register == register or register == "all":
			if current_feature == feature:
				
				with open(os.path.join(current_path, filename), "rb") as fo:
					x = pickle.load(fo)
					
					try:
						x_list.append(x)
					except:
						print("Skipping " + filename)
						skip = True

					if skip != True:
						y = [current_country for x in range(x.shape[0])]
						y_list += y
				
	#Now stack
	x = sparse.vstack(x_list)
	#y = np.vstack(y_list)

	return x, y_list
#------------------------------------------------#

if __name__ == "__main__":

	import os
	import pickle
	import numpy as np
	from scipy import sparse
	from sklearn.svm import LinearSVC
	from sklearn.model_selection import GridSearchCV
	from sklearn.metrics import classification_report
	from sklearn.metrics import confusion_matrix
	
	cross_test = True
	language = "eng"
	features = ["unigrams", "bigrams", "function", "cxg1", "cxg2"]
	registers = ["cc", "twitter"]
	
	for feature in features:
		for register in registers:
	
			print("TRAIN: " + str(register))
			check_name = "model." + language + "." + register + "." + feature + ".p"
			check_name = os.path.join(".", "models", language, check_name)
			
			with open(check_name, "rb") as fo:
				model = pickle.load(fo)
			if cross_test == True:
				if register == "cc":
					register = "twitter"
				elif register == "twitter":
					register = "cc"	
			
			print("TEST: " + str(register))
			
			test_x, test_y = get_vectors(language, "test", register, feature)
			test_predictions = model.predict(test_x)
							
			current_report = classification_report(test_y, test_predictions, labels = None)
			current_matrix = confusion_matrix(test_y, test_predictions, labels = None)
				
			print("CROSS_TEST: " + str(cross_test))
			print(language, register, feature)
			print(current_report)
			print(current_matrix)