import codecs
import os
import random

in_directory = os.path.join(".", "!Dialect_TW")
out_directory = os.path.join(".")


for filename in os.listdir(in_directory):
	
	line_list = []
	print("\tStarting " + str(filename))
	
	with open(os.path.join(in_directory, filename), "rb") as fo:
	
		for line in fo:
			line = line.decode("utf-8", errors = "replace")
			line = line.strip()
			line_list.append(line)
			
	random.shuffle(line_list)
	
	with codecs.open(os.path.join(out_directory, filename.replace(".txt","") + ".samples.txt"), "a", encoding = "utf-8") as fw:
		
		counter = 0
		line_counter = 0
		
		for line in line_list:
			line_counter += 1
			for word in line.split():
			
				fw.write(word)
			
				counter += 1
			
				if counter > 1000:
					fw.write("\n")
					counter = 0
					print(line_counter)
					line_counter = 0
				
				else:
					fw.write(" ")	