#!/bin/bash
GLOBIGNORE="*"
WORDPATH="./wordlists"
DATAPATH="./surfaceweb_onetext"
TEMPTEXT="./wordlists/temp_surf.txt"
ALPHALIST="./wordlists/wordlist_surf.txt"

[ ! -d $WORDPATH ] && mkdir $WORDPATH
[ ! -f $TEMPTEXT ] && touch $TEMPTEXT
truncate -s 0 $TEMPTEXT

for DOMAIN in $(ls $DATAPATH)
do
    TEXTPATH="${DATAPATH}/${DOMAIN}"
    echo "Working on ${DOMAIN}..."

    for word in $(cat $TEXTPATH):
    do
        if [[ ! $word =~ '*' ]]; then
            echo "$word" >> $TEMPTEXT
        fi
    done
done

cat $TEMPTEXT | tr '[A-Z]' '[a-z]' |\
                grep -ox -E '[a-z]+' |\
                sort | uniq > $ALPHALIST

echo "$ALPHALIST created."
