# -*- encoding: utf-8 -*-

# Simple grammar (No-Wordorder)


#argv[1] == hashToContext (number context)
#argv[2] == meanings (kicks_ball(purple11))
#argv[3] == vocabulary
#argv[4] == grammar_name

import re
import codecs

m0 = re.compile(r"^([^(]+)$")
m1 = re.compile(r"^([^(]+)\(([^,]+)\)$")
m2 = re.compile(r"^([^(]+)\(([^,]+),([^)]+)\)$")


import sys

def chomp(line):
	return line.rstrip('\r\n')

def clean(mr):
	return mr.replace("(","_").replace(",","_").replace(")","")
	
class PCFGRule(object):

	def __init__(self, lhs, rhs, alpha=0.1, a=1, b=100):
		#alpha is a dirichlet parameter, standardly sparse
		#a=1 --> not-adapted
		#the bigger b is, the more probable is the generation of a new tree for an adapted NT
		self._lhs = lhs
		self._rhs = rhs.split()
		self._a = a
		self._b = b
		self._alpha = alpha
		
	
	def __repr__(self):
		#set initial weights via flags...
		return self._lhs+" --> "+" ".join(self._rhs)
	
	def __eq__(self,other):
		return self.__repr__() == other.__repr__()
	
	def __hash__(self):
		return hash(self.__repr__())

class Grammar(object):
	
	def __init__(self):
		self._rules = {}
	
	def addRule(self, rule):
		self._rules.setdefault(rule._lhs,[]).append(rule)
	
	def getRules(self, lhs):
		return self._rules[lhs]
	
	def __getitem__(self, key):
		return self.getRules(key)
		
	def save(self, dest):
		output = codecs.open(dest, "w",encoding='utf-8')
		NTs = self._rules.keys()
		NTs.sort()
		for lhs in NTs:
			for rule in set(self._rules[lhs]):
				output.write(rule.__repr__()+"\n")
		output.close()

		

		
def createGrammar(contextMap, meanings, primitives,grammarName):
	
	KG = Grammar()
	prior = 0.1		
	

	#introduce NTs for Contexts
	getContextNT = {}
	contexts = contextMap.keys()
	number = 0
	for context in contexts:
		KG.addRule(PCFGRule("Q_"+str(number),context,prior))	#actually, these are hard-coded - never mind, that's learned anyway
		getContextNT[context] = "Q_"+str(number)
		number += 1
	

	#META-LEVEL

	#Non-Referentials
	KG.addRule(PCFGRule("AROOT","S_none"))
	for context in contexts:
		KG.addRule(PCFGRule("S_none",getContextNT[context]+" Phrase_none"))	

	KG.addRule(PCFGRule("Phrase_none","Word_none"))
	KG.addRule(PCFGRule("Phrase_none","Phrase_none Word_none"))

	#generate the S-Rules, taking care of contexts and arity
	for meaningRepr in meanings:
		KG.addRule(PCFGRule("AROOT","S_"+clean(meaningRepr),prior))
		t = m0.match(meaningRepr)

		if t:		#0-ary predicate
			pred = t.group(1)
			for context in contexts:
				if meaningRepr in contextMap[context].split(";"):
					meaningReprN = clean(meaningRepr)
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+pred,prior))
			continue
		t = m1.match(meaningRepr)
		if t:		#1-ary predicate
			arg1 = t.group(2)
			pred = t.group(1)
			for context in contexts:
				if meaningRepr in contextMap[context].split(";"):
					meaningReprN = clean(meaningRepr)
					#the SV and VS rules
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+arg1+" Phrase_"+pred,prior))
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+pred+" Phrase_"+arg1,prior))

			continue

		t = m2.match(meaningRepr)
		if t:		#2-ary predicate
			arg1 = t.group(2)
			arg2 = t.group(3)
			pred = t.group(1)
			for context in contexts:
				if meaningRepr in contextMap[context].split(";"):
					meaningReprN = clean(meaningRepr)
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+arg1+" Phrase_"+pred+" Phrase_"+arg2,prior))	#SVO	
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+arg1+" Phrase_"+arg2+" Phrase_"+pred,prior))	#SOV
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+arg2+" Phrase_"+arg1+" Phrase_"+pred,prior))	#OSV
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+arg2+" Phrase_"+pred+" Phrase_"+arg1,prior))	#OVS
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+pred+" Phrase_"+arg1+" Phrase_"+arg2,prior))	#VSO
					KG.addRule(PCFGRule("S_"+meaningReprN,getContextNT[context]+" Phrase_"+pred+" Phrase_"+arg2+" Phrase_"+arg1,prior))	#VOS

	#Introduce ConceptPhrases
	#Phrase_x --> Top-Level, if generated Word_x, recurse as Rrf_x, which is allowed to terminate as none, either as Rr_x which only must terminate as Word_x
	#Ph_x --> Recursive level, not Word_x generated yet
	#PhX_x --> Recusrive level, Word_x has been generated
	for primitive in primitives:
		KG.addRule(PCFGRule("Phrase_"+primitive,"Word_"+primitive,prior))
		KG.addRule(PCFGRule("Phrase_"+primitive,"PhX_"+primitive+" Word_"+primitive,prior))
		KG.addRule(PCFGRule("Phrase_"+primitive,"Ph_"+primitive+" Word_none",prior))
		KG.addRule(PCFGRule("Ph_"+primitive,"PhX_"+primitive+" Word_"+primitive,prior))
		KG.addRule(PCFGRule("Ph_"+primitive,"Ph_"+primitive+" Word_none",prior))
		KG.addRule(PCFGRule("Ph_"+primitive,"Word_"+primitive,prior))
		KG.addRule(PCFGRule("PhX_"+primitive,"PhX_"+primitive+" Word_none",prior))
		KG.addRule(PCFGRule("PhX_"+primitive,"PhX_"+primitive+" Word_"+primitive,prior))
		KG.addRule(PCFGRule("PhX_"+primitive,"Word_"+primitive,prior))
		KG.addRule(PCFGRule("PhX_"+primitive,"Word_none",prior))
		
		
		#Introduce Pre-Terminal Rules for the primitives
		for word in words:
			KG.addRule(PCFGRule("Word_"+primitive,word,prior))		

	#Word_None rule
	for word in words:
		KG.addRule(PCFGRule("Word_none",word,prior))


	KG.save(grammarName)		
		
if __name__=="__main__":
	#argv[1] == hashToContext (number context)
	#argv[2] == meanings (kicks_ball(purple11))
	#argv[3] == vocabulary
	
	#Read in contexts
	contextMap = {}
	for number, context in [chomp(line).split(" ") for line in codecs.open(sys.argv[1],'r',encoding='utf-8').readlines()]:
		contextMap[number] = context
	
	#Read in meanings
	meanings = [chomp(meaning) for meaning in codecs.open(sys.argv[2],'r',encoding='utf-8').readlines()]
	
	#Read in words
	words = [chomp(word) for word in codecs.open(sys.argv[3],'r',encoding='utf-8').readlines()]
	
	#decompose meanings to get concepts
	primitivesDict = {}
	for meaning in meanings:
		prims = meaning.replace("("," ").replace(")"," ").replace(","," ").split()
		for prim in prims:
			primitivesDict[prim] = 1
	primitives = primitivesDict.keys()
	
	createGrammar(contextMap, meanings, primitives, sys.argv[4])


