from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import os

dump_path = ["wordlists/temp_worddump.txt"]

vectorizer = CountVectorizer(input='filename', strip_accents='ascii')
count_vectors = vectorizer.fit_transform(dump_path)
feature_arr = vectorizer.get_feature_names()

doc_count = count_vectors.shape[0]
assert(doc_count == 1)

commonwords_path = 'lex_stats'

for i in range(doc_count):

    tfidf_vec = count_vectors[i]

    csv_file_name = os.path.join(commonwords_path, 'most_common_validinvalid.csv')

    df = pd.DataFrame(tfidf_vec.T.todense(), index=feature_arr, columns=["count"])
    df = df.sort_values(by=["count"], ascending=False)
    df.to_csv(csv_file_name)

    print(f'{csv_file_name} created.')
