"""
Functions:
    This program is used to calculate the degree of context noiseness.
"""
import pickle

dictSet = pickle.load(open("../data/dict.pickle"))
resultDict = dict()
windowSize = 3
normNum = 0
for i in range(2 * windowSize + 1):
    resultDict[i] = 0
print resultDict

f = open('../data/corpus.tweet1')

while True:
    line = f.readline()
    if not line:
        break
    num = int(line)
    flags = []
    for i in range(num):
        line = f.readline().rstrip()
        segs = line.split('\t')
        ill = segs[0]
        norm = segs[1]
        if ill == norm:
            flags.append(1)
        else:
            flags.append(0)
            normNum += 1
    # analyze
    for i in range(num):
        if flags[i] == 0:
            tmp = sum(flags[max(0, i - windowSize): min(num, i + windowSize + 1)])
            resultDict[tmp] += 1
f.close()
print normNum
print resultDict
for k, v in resultDict.items():
    print k, "{0:2.2%}".format(float(v) / normNum)
