#!/usr/bin/env python
import pandas as pd
import argparse
# from indictrans import Transliterator
import re
import preprocessor as p

def is_english(s: str) -> bool:
    if len(re.findall(u'[\u0900-\u097F]', s)) <= 1000:
        return True
    return False

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", default=None, type=str, required=True, help="Dataset csv file")
    args = parser.parse_args()


# df = pd.DataFrame()
df = pd.read_csv(args.i)

# trn = Transliterator(source='eng', target='mal', build_lookup=True)

print(df['Sentence'])
for i in range(len(df)):
    df['Sentence'].iloc[i] = p.clean(df['Sentence'].iloc[i])

print(df['Sentence'])
df.to_csv('clean_lat_kan_transliterate_dev.csv')
