import os
import numpy as np
import math
import sys

print 'input en or ch'
len_seq = 365

if int(sys.argv[2])%4 == 0:
    len_seq = 366

bursty_set = set()
beta = 1
input = open(sys.argv[1]+'_burstydic.txt')
lines = input.readlines()
for line in lines:
    line = line.strip()
    bursty_set.add(line)
input.close()

burst_rate = {}
sup_prop = {}

for e in bursty_set:
    burst_rate[e] = np.zeros(len_seq)
    sup_prop[e] = np.zeros(len_seq)

for i in xrange(0,len_seq):
    input = open(sys.argv[1]+'_burst/'+'day_'+str(i)+'.tf')
    lines = input.readlines()
    for line in lines:
        line = line.strip()
        attr = line.split('\t')
        term = attr[0]
        if term not in bursty_set:
            continue
        burst = float(attr[len(attr)-1])
        sup_p = float(attr[len(attr)-3])
        burst_rate[term][i] = burst
        sup_prop[term][i] = sup_p
    input.close()

out = open(sys.argv[1]+'_kleinburg_burst.txt','w')
print len(bursty_set)
index = 0
base_sup = 1.0
base_conf = 1.0

burst_par = 9

sup_level = (np.array([1,burst_par])/base_sup)
conf_level = (np.array([1,burst_par])/base_conf)
avg_level = 2*sup_level*conf_level/(sup_level+conf_level)

for e in bursty_set:
    max_score = np.zeros((len_seq,2))
    max_path = np.zeros((len_seq,2),dtype=int)
    trans_matrix = np.array([[0,1],[1,0]]) * beta
#    if e != 'google':
#        continue
    for i in xrange(0,len_seq):
        if i == 0:
            if burst_rate[e][i] == 0:
                max_score[i] = np.array([0,1])
            else:
                sup = sup_prop[e][i]/base_sup
                conf = (burst_rate[e][i]/base_conf)
                avg = 2*sup*conf/(sup+conf)
                likelihood = abs(math.log(conf)-np.log(conf_level))
                if conf>=burst_par:
                    likelihood[1] = 0
                max_score[i] = likelihood
        else:
            if burst_rate[e][i] == 0:
                score_matrix = np.array([0,1]).reshape((2,1))+trans_matrix+max_score[i-1]
            else:
                sup = sup_prop[e][i]/base_sup
                conf = (burst_rate[e][i]/base_conf)
                avg = 2*sup*conf/(sup+conf)
                likelihood = abs(math.log(conf)-np.log(conf_level))
                if conf>=burst_par:
                    likelihood[1] = 0
                score_matrix = likelihood.reshape((2,1))+trans_matrix+max_score[i-1]
            max_score[i] = np.min(score_matrix,1)
            max_path[i] = np.argmin(score_matrix,1)
#            if e == 'google':
#                print str(i)+'\t'+str(max_score[i])
    opt_path = np.zeros(len_seq,dtype=int)
    opt_path[len_seq-1] = np.argmin(max_score[len_seq-1])
    for i in xrange(len_seq-2,-1,-1):
        opt_path[i] = max_path[i+1,opt_path[i+1]]
    outstr = e
    for i in xrange(0,len_seq):
        outstr += ' '+str(opt_path[i])
    out.write(outstr.strip()+'\n')
    if index%1000 == 0:
        print index
    index += 1
out.close()
