#!/bin/bash

CONCATPATH="./surfaceweb_onetext"
CATPATH="./surfaceweb_data"

[ ! -d $CONCATPATH ] && mkdir surfaceweb_onetext

for CATEGORY in $(ls $CATPATH)
do
    FILENAME="$CONCATPATH/${CATEGORY}_all.txt"

    if [ -d "$CATPATH/$CATEGORY" ]
    then

        touch $FILENAME
        truncate -s 0 $FILENAME

        for FILE in $(ls "$CATPATH/$CATEGORY")
        do 
            cat "$CATPATH/$CATEGORY/$FILE" >> $FILENAME
        done

        dos2unix $FILENAME
        sed -i '/^[[:space:]]*$/d' $FILENAME
        perl -pi -e 's/[^[:ascii:]]//g' $FILENAME
    fi

done