Blame Scripts/Bash/Functions/Html/html_updateHeadings.sh

9f1608
#!/bin/bash
9f1608
#
9f1608
# html_updateHeadings.sh -- This function transforms html headings to
ad45de
# to make them accessible (e.g., through a table of contents).
9f1608
#
ffdd74
#   - In order for this function to work, you need to put headings in
ad45de
#     just one line and they must have the following formats:
ffdd74
#
ad45de
#       

Title

ad45de
#       

Title

ad45de
#       

Title

ffdd74
#
ad45de
#     In the above examples, h1 alternates from h1 to h6. Closing tag
ad45de
#     must be present and match the one opening. The value of 
ad45de
#     name=""> and  options are the md5sum of page
ad45de
#     location, plus the 'head-' string, plus the heading string. If
ad45de
#     heading title or page location changes, the values of 
ad45de
#     name=""> and  options will change too.
ffdd74
#
9f1608
# Copyright (C) 2009-2010 Alain Reguera Delgado
9f1608
# 
9f1608
# This program is free software; you can redistribute it and/or modify
9f1608
# it under the terms of the GNU General Public License as published by
9f1608
# the Free Software Foundation; either version 2 of the License, or
9f1608
# (at your option) any later version.
9f1608
# 
9f1608
# This program is distributed in the hope that it will be useful, but
9f1608
# WITHOUT ANY WARRANTY; without even the implied warranty of
9f1608
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
9f1608
# General Public License for more details.
9f1608
#
9f1608
# You should have received a copy of the GNU General Public License
9f1608
# along with this program; if not, write to the Free Software
9f1608
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
9f1608
# USA.
9f1608
# 
9f1608
# ----------------------------------------------------------------------
9f1608
# $Id$
9f1608
# ----------------------------------------------------------------------
9f1608
9f1608
function html_updateHeadings {
9f1608
ffdd74
    # Define variables as local to avoid conflicts outside.
ffdd74
    local FILES=''
ad45de
    local COUNT=0
c49cd9
    local PREVCOUNT=0
ffdd74
    local PATTERN=''
ffdd74
    local -a FINAL
ffdd74
    local -a TITLE
ad45de
    local -a MD5SM
ad45de
    local -a OPTNS
c49cd9
    local -a LEVEL
c49cd9
    local -a PARENT
ffdd74
ffdd74
    # Define list of html files to process using option value as
ffdd74
    # reference. 
ffdd74
    if [[ -d $OPTIONVAL ]];then
ffdd74
        FILES=$(find $OPTIONVAL -regextype posix-egrep -type f -regex '.*/*.(html|htm)$')
ffdd74
    elif [[ -f $OPTIONVAL ]];then
ffdd74
        FILES=$OPTIONVAL
ffdd74
    fi
ffdd74
ffdd74
    for FILE in $FILES;do
ffdd74
ffdd74
        # Verify list of html files. Are they really html files? If
ffdd74
        # they don't, continue with the next one in the list.
ffdd74
        if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
ffdd74
            continue
ffdd74
        fi
ffdd74
ffdd74
        # Output action message.
c49cd9
        #cli_printMessage $FILE 'AsUpdatingLine'
c49cd9
c49cd9
        # Define html heading regular expression. Use parenthisis to save
c49cd9
        # html option name, option value, and heading title.
c49cd9
        PATTERN="<h([1-9])>(<a.*[^\>]>)(.*[^<])</h[1-9]>"
c49cd9
c49cd9
        # Define list of headings to process. When building the
c49cd9
        # heading, it is required to change spaces characters from its
c49cd9
        # current output form to something different (e.g., its \x040
c49cd9
        # octal alternative). This is required because the space
c49cd9
        # character is used as egrep default field separator and
c49cd9
        # spaces can be present inside heading strings we don't want
c49cd9
        # to separate.
c49cd9
        for HEADING in $(egrep "$PATTERN" $FILE \
c49cd9
            | sed -r -e 's!^[[:space:]]+!!' -e "s! !\x040!g");do
c49cd9
c49cd9
            # Define previous counter value using current counter
c49cd9
            # value as reference.
c49cd9
            if [[ $COUNT -ne 0 ]];then
c49cd9
                PREVCOUNT=$(($COUNT-1))
c49cd9
            fi
ffdd74
c49cd9
            # Define initial heading information.
c49cd9
            FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\x040! !g")
c49cd9
            TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
c49cd9
            MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
c49cd9
            OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
c49cd9
            LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
c49cd9
            PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
c49cd9
c49cd9
            # Transform heading information using initial heading
c49cd9
            # information as reference.
c49cd9
            if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
c49cd9
                OPTNS[$COUNT]=''
c49cd9
            elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then 
c49cd9
                OPTNS[$COUNT]=''
c49cd9
            elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
c49cd9
                OPTNS[$COUNT]=''
ad45de
            fi
ad45de
c49cd9
            # Build final html heading structure.
c49cd9
            FINAL[$COUNT]='<h'${LEVEL[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
ffdd74
c49cd9
            # Build html heading link structure.
c49cd9
            LINK[$COUNT]=''${TITLE[$COUNT]}''
ffdd74
c49cd9
            # Build table of contents entry with numerical
c49cd9
            # identifications.
c49cd9
            TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
ffdd74
c49cd9
            # Update heading information using the first and last
c49cd9
            # heading structures.
c49cd9
            #sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
ad45de
c49cd9
            # Increase heading counter.
c49cd9
            COUNT=$(($COUNT + 1))
ad45de
c49cd9
        done
ad45de
c49cd9
        # Reset heading counter.
c49cd9
        COUNT=0
ad45de
c49cd9
        # Use awk to build the table of content.
c49cd9
        for TOC in "${TOCENTRIES[@]}";do
c49cd9
            echo $TOC
c49cd9
        done \
c49cd9
            | awk 'BEGIN{FS=":"}{printf "%s\n", $4}'
ffdd74
ffdd74
    done
ffdd74
9f1608
}