import numpy as np
from transformers import pipeline, set_seed

train_data = np.load('data-augmentation-for-sts/data/low_data.npz', allow_pickle=True)['store'][()]['train']
dev_data = np.load('data-augmentation-for-sts/data/low_data.npz', allow_pickle=True)['store'][()]['dev']
test_data = np.load('data-augmentation-for-sts/data/low_data.npz', allow_pickle=True)['store'][()]['test']
print("Example of data:", train_data[0])

generator = pipeline('text-generation', model='facebook/opt-125m') 

# Single sentence aug
syn_data = []

num = 1

for i in range(len(train_data)):
  txt = train_data[i][0]
  for _ in range(num):
    new_txt = generator(txt, max_length=20, do_sample=True)
    syn_data.append([new_txt[0]['generated_text'], train_data[i][1], train_data[i][2]])

syn_data = syn_data + train_data
print("size of train data:", len(train_data))
print("size of augmented data:", len(syn_data))

store = {}
store['train'] = syn_data
store['dev'] = dev_data
store['test'] = test_data

np.savez('data-augmentation-for-sts/low_data_opt.npz', store=store)