#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import os
import sqlite3
from tqdm import tqdm
from argparse import ArgumentParser


def insert(a, b, cur):
    cur.execute("""
    INSERT INTO conversation (ask, answer) VALUES
    ('{}', '{}')
    """.format(a.encode("utf8").replace("'", "''"), b.encode("utf8").replace("'", "''")))
    return 1


def main(file_path,en_len,de_len,type):
    string='bucket_dbs/bucket_{}_{}.db'
    if type=='test':
        string='bucket_dbs/test/bucket_{}_{}.db'
    db = string.format(en_len,de_len)
    if os.path.exists(db):
        os.remove(db)
    conn = sqlite3.connect(db)
    cur = conn.cursor()
    cur.execute("""
        CREATE TABLE IF NOT EXISTS conversation
        (ask text, answer text);
        """)
    conn.commit()
    with open(file_path) as f:
        num_lines = sum(1 for line in f)
    inserted = 0
    batch_size = 1e5
    with open(file_path) as f:
        for line in tqdm(f, total=num_lines):
            ask, ans = line.decode("utf8").strip().split("#TAB#")
            inserted += insert(ask, ans, cur)
            if inserted != 0 and inserted % batch_size == 0:
                conn.commit()    


    conn.commit()

    print('Total inserted: %d' % inserted)

if __name__ == '__main__':
    parser = ArgumentParser(usage='usage tip', description='no')

    parser.add_argument('--filepath', default='./data/train.reddit.txt', type=str, help='train dir')
    parser.add_argument('--enlen', default=100, type=int, help='query sentence max length')
    parser.add_argument('--delen', default=60, type=int, help='response sentence max length')
    parser.add_argument('--type',default='train',type=str,help="train file or test file")

    args = parser.parse_args()
    print(args)
    filepath=args.filepath
    enlen=args.enlen
    delen=args.delen
    type=args.type
    main(filepath,enlen,delen,type)
