#!/bin/bash

CONCATPATH="./categories_onetext"
CATPATH="./categories"

[ ! -d $CONCATPATH ] && mkdir categories_onetext

for CATEGORY in $(ls $CATPATH)
do
    FILENAME="$CONCATPATH/${CATEGORY}_en_all.txt"

    touch $FILENAME
    truncate -s 0 $FILENAME

    for FILE in $(ls "$CATPATH/$CATEGORY")
    do
        cat "$CATPATH/$CATEGORY/$FILE" >> $FILENAME
    done

    # Some preprocessing methods
    dos2unix $FILENAME
    sed -i '/^[[:space:]]*$/d' $FILENAME
    perl -pi -e 's/[^[:ascii:]]//g' $FILENAME

done

python3 preprocess_onetext.py
