Blame Scripts/Bash/Functions/Html/html_updateHeadings.sh

9f1608
#!/bin/bash
9f1608
#
9f1608
# html_updateHeadings.sh -- This function transforms html headings to
ad45de
# to make them accessible (e.g., through a table of contents).
9f1608
#
ffdd74
#   - In order for this function to work, you need to put headings in
ad45de
#     just one line and they must have the following formats:
ffdd74
#
ad45de
#       

Title

ad45de
#       

Title

ad45de
#       

Title

ffdd74
#
ad45de
#     In the above examples, h1 alternates from h1 to h6. Closing tag
a9264e
#     must be present and match the one opentaging. The value of 
ad45de
#     name=""> and  options are the md5sum of page
ad45de
#     location, plus the 'head-' string, plus the heading string. If
ad45de
#     heading title or page location changes, the values of 
ad45de
#     name=""> and  options will change too.
ffdd74
#
9f1608
# Copyright (C) 2009-2010 Alain Reguera Delgado
9f1608
# 
9f1608
# This program is free software; you can redistribute it and/or modify
9f1608
# it under the terms of the GNU General Public License as published by
9f1608
# the Free Software Foundation; either version 2 of the License, or
9f1608
# (at your option) any later version.
9f1608
# 
9f1608
# This program is distributed in the hope that it will be useful, but
9f1608
# WITHOUT ANY WARRANTY; without even the implied warranty of
9f1608
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
9f1608
# General Public License for more details.
9f1608
#
9f1608
# You should have received a copy of the GNU General Public License
9f1608
# along with this program; if not, write to the Free Software
9f1608
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
9f1608
# USA.
9f1608
# 
9f1608
# ----------------------------------------------------------------------
9f1608
# $Id$
9f1608
# ----------------------------------------------------------------------
9f1608
9f1608
function html_updateHeadings {
9f1608
ffdd74
    # Define variables as local to avoid conflicts outside.
ffdd74
    local FILES=''
ad45de
    local COUNT=0
c49cd9
    local PREVCOUNT=0
ffdd74
    local PATTERN=''
ffdd74
    local -a FINAL
ffdd74
    local -a TITLE
ad45de
    local -a MD5SM
ad45de
    local -a OPTNS
c49cd9
    local -a LEVEL
c49cd9
    local -a PARENT
ffdd74
ffdd74
    # Define list of html files to process using option value as
ffdd74
    # reference. 
ffdd74
    if [[ -d $OPTIONVAL ]];then
ffdd74
        FILES=$(find $OPTIONVAL -regextype posix-egrep -type f -regex '.*/*.(html|htm)$')
ffdd74
    elif [[ -f $OPTIONVAL ]];then
ffdd74
        FILES=$OPTIONVAL
ffdd74
    fi
ffdd74
ffdd74
    for FILE in $FILES;do
ffdd74
ffdd74
        # Verify list of html files. Are they really html files? If
ffdd74
        # they don't, continue with the next one in the list.
ffdd74
        if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
ffdd74
            continue
ffdd74
        fi
ffdd74
ffdd74
        # Output action message.
a9264e
        cli_printMessage $FILE 'AsUpdatingLine'
c49cd9
c49cd9
        # Define html heading regular expression. Use parenthisis to save
c49cd9
        # html option name, option value, and heading title.
c49cd9
        PATTERN="<h([1-9])>(<a.*[^\>]>)(.*[^<])</h[1-9]>"
c49cd9
c49cd9
        # Define list of headings to process. When building the
c49cd9
        # heading, it is required to change spaces characters from its
a9264e
        # current output form to something different (e.g., its \040
c49cd9
        # octal alternative). This is required because the space
c49cd9
        # character is used as egrep default field separator and
c49cd9
        # spaces can be present inside heading strings we don't want
c49cd9
        # to separate.
c49cd9
        for HEADING in $(egrep "$PATTERN" $FILE \
a9264e
            | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
c49cd9
c49cd9
            # Define previous counter value using current counter
c49cd9
            # value as reference.
c49cd9
            if [[ $COUNT -ne 0 ]];then
c49cd9
                PREVCOUNT=$(($COUNT-1))
c49cd9
            fi
ffdd74
c49cd9
            # Define initial heading information.
a9264e
            FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g")
c49cd9
            TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
c49cd9
            MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
c49cd9
            OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
c49cd9
            LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
c49cd9
            PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
c49cd9
c49cd9
            # Transform heading information using initial heading
c49cd9
            # information as reference.
c49cd9
            if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
c49cd9
                OPTNS[$COUNT]=''
c49cd9
            elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then 
c49cd9
                OPTNS[$COUNT]=''
c49cd9
            elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
c49cd9
                OPTNS[$COUNT]=''
ad45de
            fi
ad45de
c49cd9
            # Build final html heading structure.
c49cd9
            FINAL[$COUNT]='<h'${LEVEL[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
ffdd74
c49cd9
            # Build html heading link structure.
c49cd9
            LINK[$COUNT]=''${TITLE[$COUNT]}''
ffdd74
c49cd9
            # Build table of contents entry with numerical
c49cd9
            # identifications.
c49cd9
            TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
ffdd74
c49cd9
            # Update heading information using the first and last
c49cd9
            # heading structures.
a9264e
            sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
ad45de
c49cd9
            # Increase heading counter.
c49cd9
            COUNT=$(($COUNT + 1))
ad45de
c49cd9
        done
ad45de
c49cd9
        # Use awk to build the table of content.
a9264e
        TOC=$(echo '
'
a9264e
            echo "

`gettext "Table of contents"`

"
a9264e
            for TOCENTRY in "${TOCENTRIES[@]}";do
a9264e
                echo $TOCENTRY
a9264e
            done \
a9264e
                | awk 'BEGIN {FS=":"}
a9264e
                         {
a9264e
                         if ($1 == 0 && $2 == $3) { 
a9264e
                            opentags  = "
  • "
a9264e
                            closetags = ""
a9264e
                            }
a9264e
a9264e
                         if ($1 >  0 && $2 >  $3) {
a9264e
                            opentags  = "
  • "
a9264e
                            closetags = ""
a9264e
                            }
a9264e
a9264e
                         if ($1 >  0 && $2 == $3) { 
a9264e
                            opentags  = "
  • "
  • a9264e
                                closetags = ""
    a9264e
                                }
    a9264e
    a9264e
                             if ($1 >  0 && $2 <  $3) { 
    a9264e
                                    opentags = ""
    a9264e
                                for (i = 1; i <= ($3 - $2); i++) {
    a9264e
                                    opentags  = opentags ""
    a9264e
                                    closetags = ""
    a9264e
                                    }
    a9264e
                                    opentags = opentags "
  • "
  • a9264e
                                }
    a9264e
    a9264e
                             printf "%s%s%s\n",opentags,$4,closetags
    a9264e
                             }
    a9264e
    a9264e
                         END {
    a9264e
                             if ($1 > 0 && $2 >= $3 && $3 > 1) {
    a9264e
                                for (i = 1; i <= $3; i++) {
    a9264e
                                    print ""
    a9264e
                                }
    a9264e
                             }
    a9264e
                             if ($1 > 0 && $2 >= $3 && $3 == 1) {
    a9264e
                                    print ""
    a9264e
                                    print ""
    a9264e
                             }
    a9264e
                             if ($1 > 0 && $2 < $3) {
    a9264e
                                for (i = 1; i <= $2; i++) {
    a9264e
                                    print ""
    a9264e
                                }
    a9264e
                             }
    a9264e
                             print ""
    a9264e
                             }')
    a9264e
    a9264e
    a9264e
            # Update file's table of contents.
    a9264e
            sed -i -r '/
    (.*)<\/div>/c'"$(echo -e $TOC)" $FILE
    a9264e
    a9264e
            # Reset counters.
    a9264e
            COUNT=0
    a9264e
            PREVCOUNT=0
    ffdd74
    ffdd74
        done
    ffdd74
    9f1608
    }