import os
import re

PATH_BASE = os.getcwd()
PATH_WORDS = os.path.join(PATH_BASE, "wordlists")

DATA_LIST = os.path.join(PATH_BASE, "english_duta.txt")
TEMP_TEXT = os.path.join(PATH_WORDS, "temp.txt")

encodings = ['utf-8', 'windows-1250']

with open(DATA_LIST, 'r') as f:
    files = f.readlines()

files = [x.rstrip() for x in files]

ftemp = open(TEMP_TEXT, 'w')

for sample in files:

    for e in encodings:
        try:
            with open(sample, 'r', encoding=e) as f:
                content = f.read()
        except UnicodeDecodeError:
            pass
        else:
            break
    
    if content is not None:
        words = content.split()
        for word in words:
            ftemp.write(f'{word}\n')

ftemp.close()