#suggested promps to test Match-Extend out:
#serialize({"#a","ab","bc","cd","de","e%"},{"ea"}) #full reduplication
#serialize({"#a","ab","bc","cd","de","e%"},{"ec"}) #partial reduplication
#serialize({"#a","ab","bc","cd","de","e%"},{"ea","#c"}) #leftward 'wrong end' reduplication
#serialize({"#a","ab","bc","cd","de","e%"},{"ea","c%"}) #rightward 'wrong end' reduplication
#serialize({"#a","ab","bc","cd","de","e%"},{"#x","xy","yz","za"}) #prefix
#serialize({"#a","ab","bc","cd","de","e%","#j","jk","kl","la"},{"#x","xy","yz","zj"}) #prefix on an already-prefixed form
#serialize({"#a","ab","bc","cd","de","e%"},{"ea","ec"}) #double reduplucation with a 'shorter' back loop
#serialize({"#a","ab","bc","cd","de","e%"},{"ea","ax","xc"}) #'early' NESF
#serialize({"#a","ab","bc","cd","de","e%"},{"ea","cx","xe"}) #'late' NESF
#serialize({"#k","ka","at","t%"},{"tz","z%"}) #English example
#serialize({"#h","hN","Nu","u?","?%"},{"hm","mN"}) #Atayal example
#serialize({"#c","ci","ip","pk","ka","an", "n%"},{"cp","ic"}) #Tohono O'odham example
#serialize({"#s","su","ut","t%"},{"#i","it","ts"}) #Nancowry example
#serialize({"#g","ga","ad","d%"},{"dg","da"}) #Lushotseed example
#serialize({"#s","si","il","l%"},{"ls","is"}) #Nlaka’pamuctsin example
#serialize({"#s","sl","lO","Og", "g%"},{"sg","gl"}) #Temiar example




def match(set_of_strings):
    #match starts by testing for identity of two segments (e.g. "abc" and "bcd") and then one segment ("abc" and "cde").
    #I am sure there is a smarter way to code for maximal overlap between two strings.

    candidates = []
    pairs = [(x,y) for x in set_of_strings for y in set_of_strings if x!=y] #generates all pairs of two strings in the workplace

    for p in pairs:
        #lists all pairs in which the first string's last two characters are identical to the second strings' first two characters
        if p[0][-2]+p[0][-1] == p[1][0]+p[1][1]:
            candidates.append((p[0],p[1],p[0]+p[1][2:]))

    if not candidates:
        for p in pairs:
            #repeats the above for a single character if the first search failed
            if p[0][-1] == p[1][0]:
                candidates.append((p[0],p[1],p[0]+p[1][1:]))

    #returns a list of triplets of the form ('abcde','defgh','abcdefgh') where the last member is meant to replace the first two
    #or an empty list if it found no match
    return (candidates) 

def extend(StemSet, Workspace):

    NewWorkSpace = []

    for item in Workspace:
        left_extend = ""
        right_extend = ""
        for pair in StemSet:
            
            if pair[-1] == item[0]:
                left_extend = pair[0] #finds pairs in the Stemset whose last character is identical to the first character of a string in the Workspace 
            if item[-1] == pair[0]:
                right_extend = pair[1] #finds pairs in the Stemset whose first character is identical to the last character of a string in the Workspace

        NewWorkSpace.append(left_extend + item + right_extend)
        #clean up because if a string in the Workspace does not find any match one one side it will otherwise extend with the same pair as the previous workspace string tested
        left_extend = ""
        right_extend = ""

    #returns an updated Workspace with the items in the Workspace extended
    #NewWorkSpace will be identical to Workspace iff all strings in Workspace beging with '#' and end with '%'
    return NewWorkSpace 



def serialize(StemSet, Workspace):
    #Every segment must be a unique character.
    #by default serialize prints the states of the Workspace.
    #set Steps to False to turn off.

    tempworkspace = []
    for i in Workspace:
        tempworkspace.append(i)
    

    while 1:
        
        testmatch = match(tempworkspace) #tests if match will find anything, match returns an empty list if it finds no match
        if testmatch:
            #replaces the two matching strings with the string combining them, e.g. replacing 'abcde' and 'defgh' by 'abcdefgh'
            tempworkspace.remove(testmatch[0][0])
            tempworkspace.remove(testmatch[0][1])
            tempworkspace.append(testmatch[0][2])

            
            return serialize(StemSet,tempworkspace)
        else:
            #extend only takes place if match failed
            testextend = extend(StemSet, Workspace)
            if set(testextend) != set(Workspace): #NewWorkSpace will be identical to Workspace iff all strings in Workspace beging with '#' and end with '%'
                return serialize(StemSet,testextend)
            else:
                if len(Workspace)==1: return Workspace[0]
                else: return Workspace

def turn_into_a_set_of_character_pairs(string):
    #this function is not relevant to Match-Extend and exists only to simplify providing an input to serialize
    #Turn_into_a_set_of_character_pairs("#abcdefg%") returns {'de', 'g%', 'ab', 'cd', '#a', 'fg', 'bc', 'ef'}

    return {string[i]+string[i+1] for i in range(len(string)-1)}

    
