#!/bin/bash

ASPELL=aspell
HUNSPELL=hunspell
: ${SCOWL:=..}

set -e

export LANG=C
export LC_ALL=C
export LC_CTYPE=C
export LC_COLLATE=C

SIZE=60

mk-list() { $SCOWL/mk-list -d $SCOWL/final "$@"; }

prep() {
  echo prep 

  cat $SCOWL/misc/{offensive.1,offensive.2,profane.1} | sort -u > nosug

  cp $SCOWL/speller/en.aff eng_affix.dat

  cat << EOF > eng.dat
name eng
charset iso8859-1
special ' -*-
affix eng
EOF
}

doit() {
  echo creating $1.dic

  eval $2 | sort -u > $1.0

  comm $1.0 nosug -12 > $1-nosug.1

  comm $1.0 nosug -23 > $1.1

  $ASPELL -l ./eng munch-list < $1-nosug.1 | grep -v '^\(XXX\|>>>\)' | $SCOWL/speller/add-no-suggest > $1.2

  $ASPELL -l ./eng munch-list < $1.1 | grep -v '^\(XXX\|>>>\)' >> $1.2

  cat $SCOWL/speller/en.dic.supp >> $1.2

  wc -l $1.2 | cut -d' ' -f1 > $1.dic
  cat $1.2 | sort >> $1.dic

  cp $SCOWL/speller/en.aff $1.aff
  
  if [ "$SCOWL_VERSION" ]; then
    fn="$1-$SCOWL_VERSION"
  else
    fn="$1"
  fi

  WHAT="$1 Hunspell Dictionary" sh $SCOWL/speller/README_en.txt.sh > README_$1.txt
  if [ -z "$3" ]; then
    echo "Wordlist Command: $2" >> README_$1.txt
  else
    cat $3 >> README_$1.txt
  fi

  rm -f hunspell-$fn.zip
  zip -9 hunspell-$fn.zip README_$1.txt $1.dic $1.aff

  if [ -z "$3" ]; then
    mkdir -p hunspell
    cp hunspell-$fn.zip hunspell/

    #echo check

    cat $1-nosug.1 $1.1 | sort -u > $1.tocheck

    #hunspell -l -d ./$1 < $1.dic.tocheck > misspelled

    cat $1.tocheck | iconv -f iso-8859-1 -t utf-8 | unix2dos > $1.txt
    cat <<EOF > README.txt
This zip file contains the words found in the corresponding Hunspell
dictionary.  See the file README_$1.txt.
EOF
    zip -9 hunspell/wordlist-$fn.zip README.txt README_$1.txt $1.txt
  fi
}

prep

if [ "$1" = "-all" ]
then 

  deaccent() {
    $SCOWL/src/deaccent
  }

  doit en_US "mk-list en_US $SIZE | deaccent"
  doit en_CA "mk-list en_CA $SIZE | deaccent"
  doit en_GB-ize "mk-list en_GB-ize $SIZE | deaccent"
  doit en_GB-ise "mk-list en_GB-ise $SIZE | deaccent"

  with-and-without-accents() { 
    tmp1=`mktemp tmp-XXX`
    tmp2=`mktemp tmp-XXX`
    cat - > $tmp1
    deaccent < $tmp1 > $tmp2
    cat $tmp1 $tmp2
    rm $tmp1 $tmp2
  }

  doit en_US-large "mk-list -v1 en_US 70 | with-and-without-accents "
  doit en_CA-large "mk-list -v1 en_CA 70 | with-and-without-accents "
  doit en_GB-large "mk-list -v1 en_GB-ize en_GB-ise 70 | with-and-without-accents"

  sh $SCOWL/speller/README_en.txt.sh > hunspell/README

elif [ "$1" = "-one" -a -n "$2" -a -n "$3" ]
then

  doit $2 "cat" $3

else 

  echo "usage: $0 -all | -one <dict-name> <parms file>"

fi

#rm eng*.dat nosug en_US*.? en_CA*.?
