import argparse
import pandas as pd
import numpy as np
import os

default_train_file = ''


def merge_cluster(input_file1, input_file2, output_file):
    # input1: raw training data
    # input2: predicted clusters
    input1 = pd.read_csv(input_file1, sep='\t')
    input2 = pd.read_csv(input_file2, sep='\t')
    output = input1.merge(input2, how='inner', left_index=True, right_index=True)

    output.to_csv(output_file, index=False, sep='\t')


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='train')
    parser.add_argument('--input_file1', default='', type=str)
    parser.add_argument('--input_file2', default='', type=str)
    parser.add_argument('--output_file', default='', type=str)
    # parser.add_argument('dataset', default='cp', choices=['cp'])
    parser.add_argument('--purpose', default='merge_cluster', type=str)

    args = parser.parse_args()
    output_dir = '/'.join(args.output_file.split('/')[:-1])

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Predict the reconstruct words for manual evaluation
    if args.purpose == 'merge_cluster':
        # merge observed with predicted clusters
        """
        python evaluate.py --input_file1 /home/jxchen/efs/1Projects/E2E/template-cluster/data/cp-clust-clean \
        --input_file2 ./results/glove_lstm_20190927_len_10_enc_10/models/cluster_prediction_140.tsv \
        --output_file ./results/glove_lstm_20190927_len_10_enc_10/output/cluster_prediction_140_merge.tsv
        """
        merge_cluster(args.input_file1, args.input_file2, args.output_file)



