"""
Functions:
    This program is to calculate the statistical significance via method discussed in Yeh (2000) More accurate tests for the statistical significance of result differences.In Proc. COLING 2000: http://www.aclweb.org/anthology-new/C/C00/C00-2137.pdf
"""
import random
import sys
import os

def parseResults(rfi, rfb, rfn):
    """
    Parse results into vectors. For each item in baseMethod/newMethod vector, '1' means it is same with golden standard.
    """
    frfi = open(rfi)    
    frfb = open(rfb)
    frfn = open(rfn)
    inVec = []
    baseVec = []
    newVec = []
    goldVec = []
    while True:
        line = frfi.readline().rstrip()
        if not line:
            break
        inTokens = line.split(' ')
        line = frfi.readline().rstrip()
        goldTokens = line.split(' ')

        line = frfb.readline().rstrip()
        baseTokens = line.split(' ')
        frfb.readline()
        
        line = frfn.readline().rstrip()
        newTokens = line.split(' ')
        frfn.readline()

        assert len(newTokens) ==len(inTokens)
        assert len(goldTokens) == len(inTokens)
        assert len(baseTokens) == len(inTokens)

        n = len(inTokens)

        for i in range(n):
            if inTokens[i] != goldTokens[i]:
                if newTokens[i] == goldTokens[i]:
                    newVec.append(1)
                else:
                    newVec.append(0)
                if baseTokens[i] == goldTokens[i]:
                    baseVec.append(1)
                else:
                    baseVec.append(0)                
                goldVec.append(1)
                inVec.append(0)
    frfi.close()
    frfb.close()
    frfn.close()
    
    bplus = 0
    nplus = 0
    plus = 0
    minus = 0
    for i in range(len(baseVec)):
        if baseVec[i] == 1 and newVec[i] == 0:
            bplus += 1
        elif baseVec[i] == 0 and newVec[i] == 1:
            nplus += 1
        elif baseVec[i] == 0 and newVec[i] == 0:
            minus += 1
        else:
            plus += 1
    return (inVec, baseVec, newVec, goldVec)

def calScore(inVec, newVec, goldVec, N):
    truePos = 0.0
    for i in range(N):
        if newVec[i] == goldVec[i]:
            truePos += 1
    prec = truePos / N 
    return prec

def randomTest(inVec, baseVec, newVec, goldVec, iniScore, iteration, N):
    nc = 0.0
    for k in range(iteration):
        tmpbaseVec = list(baseVec)
        tmpnewVec = list(newVec)
        bar = random.randint(0, N - 1)
        for i in range(N):
            if random.randint(0, N - 1) >= bar:
                tmp = tmpnewVec[i]
                tmpnewVec[i] = tmpbaseVec[i]
                tmpbaseVec[i] = tmp
        score = calScore(inVec, tmpnewVec, goldVec, N)
        if score > iniScore:
            nc += 1
    return nc

def runTest(rfi, rfb, rfn, iteration, baseline, newmethod):
    (inVec, baseVec, newVec, goldVec) = parseResults(rfi, rfb, rfn)
    N = len(goldVec)
    iniScore = calScore(inVec, newVec, goldVec, N)
    nc = randomTest(inVec, baseVec, newVec, goldVec, iniScore, iteration, N)
    print "baseline:{2}\tnew method:{3}\tp-Value: {0:2.4%}\t{1}".format((nc + 1.0) / (iteration + 1.0), nc, baseline, newmethod)

if __name__ == "__main__":    
    if len(sys.argv) == 3:
        iteration = 1048576 # number of tests
        rfi = "../data/corpus.tweet1c"
        rfb = "../result/tweet1.{0}.test".format(sys.argv[1])
        rfn = "../result/tweet1.{0}.test".format(sys.argv[2])
        runTest(rfi, rfb, rfn, iteration, sys.argv[1], sys.argv[2])
    else:
        print "Error in input parameter!"
