import os
import sys
import time as TM

month_day_ping = [0,31,59,90,120,151,181,212,243,273,304,334]
month_day_run = [0,31,60,91,121,152,182,213,244,274,305,335]
flag = False

if int(sys.argv[2])%4 == 0:
    flag = True

def startday(date):
    y = int(date[0:4])
    m = int(date[4:6])
    d = int(date[6:8])
    if flag == False:
        return month_day_ping[m-1] + d - 1
    return month_day_run[m-1] + d -1

time_burst = {}
input = open(sys.argv[1]+'_time_burst_feature.txt')
lines = input.readlines()
for line in lines:
    line = line.strip()
    attr = line.split('\t')
    time = int(attr[0])
    if time not in time_burst:
        time_burst[time] = {}
    time_burst[time][attr[1]] = int(attr[2])
input.close()

edge_table = {}
if sys.argv[1]=='ch':
    match_str = 'CMN_'
else:
    match_str = 'ENG_'

start = TM.clock()
data_path = sys.argv[1]+'_stream/'
print 'Loading data set...'
datedirs = os.listdir(data_path)
for datedir in datedirs:
    if not datedir.startswith(sys.argv[2]):
        continue
    print datedir
    filelist = os.listdir(data_path+datedir+'/')
    for file in filelist:
        datestr = file[file.find(match_str)+4:]
        datestr = datestr[0:8]
        feature_set = set()
        date = startday(datestr)
        if date not in time_burst:
            continue
        input = open(data_path+datedir+'/'+file)
        lines = input.readlines()
        input.close()
        for line in lines:
            line = line.strip()
            for e in line.split():
                if '#' in e:
                    if len(e)==1:
                        continue
                    e = e[0:e.rfind('#')]
                if e not in time_burst[date]:
                    continue
                feature_set.add(time_burst[date][e])
        feature_list = list(feature_set)
        for i in xrange(0,len(feature_list)):
            for j in xrange(i+1,len(feature_list)):
                a = feature_list[i]
                b = feature_list[j]
                if feature_list[i]>feature_list[j]:
                    tmp = b
                    b = a
                    a = tmp
                if (a,b) not in edge_table:
                    edge_table[(a,b)]=0
                edge_table[(a,b)]+=1

out = open(sys.argv[1]+'_graph_info.txt','w')
edge_threshold = 4
if sys.argv[1]=='en':
    edge_threshold *= 5
for e in edge_table:
    if edge_table[e]>=edge_threshold:
        out.write(str(e)+'\t'+str(edge_table[e])+'\n')
out.close()
end = TM.clock()

print end-start
