import numpy as np
import nlpaug.augmenter.char.random as nac
import random

train_data = np.load('data-augmentation-for-sts/data/low_data.npz', allow_pickle=True)['store'][()]['train']
dev_data = np.load('data-augmentation-for-sts/data/low_data.npz', allow_pickle=True)['store'][()]['dev']
test_data = np.load('data-augmentation-for-sts/data/low_data.npz', allow_pickle=True)['store'][()]['test']
print("Example of data:", train_data[0])

syn_data = []
RECOMMENDED_CHARACTER_METHODS = ["insert", "substitute", "swap", "delete"]

for _ in range(1): # 9 (low-resource) or 2 (half)
  for i in range(len(train_data)):
    method = RECOMMENDED_CHARACTER_METHODS[random.randint(0,3)]
    conv_aug = nac.RandomCharAug(action=method, aug_char_p=0.1, include_upper_case=False,
                                    include_numeric=False, spec_char =' ')
    txt = train_data[i][0]
    new_txt = conv_aug.augment(txt)[0]
    syn_data.append([new_txt, train_data[i][1], train_data[i][2]])

syn_data = syn_data + train_data
print("size of train data:", len(train_data))
print("size of augmented data:", len(syn_data))


store = {}
store['train'] = syn_data
store['dev'] = dev_data
store['test'] = test_data

np.savez('data-augmentation-for-sts/low_data_char.npz', store=store)