Blame Scripts/Functions/Tuneup/tuneup_doXhtmlHeadings.sh

9f1608
#!/bin/bash
9f1608
#
9f1608
# html_updateHeadings.sh -- This function transforms html headings to
89c744
# to make them accessible (e.g., through a table of contents).  In
89c744
# order for this function to work, you need to put headings in just
89c744
# one line and they must have one of the following formats:
ffdd74
#
7cd8e9
# 

Title

7cd8e9
# 

Title

7cd8e9
# 

Title

ffdd74
#
89c744
# In the above examples, h1 can vary from h1 to h6. Closing tag must
89c744
# be present and match the openning tag. The value of  and
89c744
#  options are the md5sum of page location, plus the
89c744
# 'head-' string, plus the heading string. If heading title or page
89c744
# location changes, the values of  and  options
89c744
# will change too.
ffdd74
#
9f5f2e
# Copyright (C) 2009-2011 Alain Reguera Delgado
9f1608
# 
7cd8e9
# This program is free software; you can redistribute it and/or
7cd8e9
# modify it under the terms of the GNU General Public License as
7cd8e9
# published by the Free Software Foundation; either version 2 of the
7cd8e9
# License, or (at your option) any later version.
9f1608
# 
9f1608
# This program is distributed in the hope that it will be useful, but
9f1608
# WITHOUT ANY WARRANTY; without even the implied warranty of
9f1608
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
9f1608
# General Public License for more details.
9f1608
#
9f1608
# You should have received a copy of the GNU General Public License
9f1608
# along with this program; if not, write to the Free Software
9f1608
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
9f1608
# USA.
9f1608
# 
9f1608
# ----------------------------------------------------------------------
9f1608
# $Id$
9f1608
# ----------------------------------------------------------------------
9f1608
89c744
function tuneup_doXhtmlHeadings {
9f1608
ffdd74
    # Define variables as local to avoid conflicts outside.
ad45de
    local COUNT=0
c49cd9
    local PREVCOUNT=0
ffdd74
    local PATTERN=''
ffdd74
    local -a FINAL
ffdd74
    local -a TITLE
ad45de
    local -a MD5SM
ad45de
    local -a OPTNS
c49cd9
    local -a LEVEL
c49cd9
    local -a PARENT
8aa7eb
    local -a TOCENTRIES
8aa7eb
    local -a LINK
ffdd74
8aa7eb
    # Define html heading regular expression pattern. Use parenthisis
79e7c6
    # to save html action name, action value, and heading title.
89c744
    PATTERN="<h([1-6])>(<a.*[^\>]>)(.*[^<])</h[1-6]>"
89c744
89c744
    # Verify list of html files. Are files really html files? If they
89c744
    # don't, continue with the next one in the list.
89c744
    if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
89c744
        continue
89c744
    fi
89c744
89c744
    # Define list of headings to process. When building the heading,
89c744
    # it is required to change spaces characters from its current
89c744
    # decimal output to something different (e.g., its \040 octal
89c744
    # alternative). This is required because the space character is
89c744
    # used as egrep default field separator and spaces can be present
89c744
    # inside heading strings we don't want to separate.
89c744
    for HEADING in $(egrep "$PATTERN" $FILE \
89c744
        | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
89c744
89c744
        # Define previous counter value using current counter
89c744
        # value as reference.
89c744
        if [[ $COUNT -ne 0 ]];then
89c744
            PREVCOUNT=$(($COUNT-1))
89c744
        fi
89c744
89c744
        # Define initial heading information.
89c744
        FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g")
89c744
        TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
89c744
        MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
89c744
        OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
89c744
        LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
89c744
        PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
89c744
89c744
        # Transform heading information using initial heading
89c744
        # information as reference.
89c744
        if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
89c744
            OPTNS[$COUNT]=''
89c744
        elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then 
89c744
            OPTNS[$COUNT]=''
89c744
        elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
89c744
            OPTNS[$COUNT]=''
89c744
        fi
ffdd74
89c744
        # Build final html heading structure.
89c744
        FINAL[$COUNT]='<h'${LEVEL[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
216869
89c744
        # Build html heading link structure. These links are used by
89c744
        # the table of contents later.
89c744
        LINK[$COUNT]=''${TITLE[$COUNT]}''
46d906
89c744
        # Build table of contents entry with numerical
89c744
        # identifications. The numerical identification is what we use
89c744
        # to determine the correct position of each heading link on
89c744
        # the table of content.
89c744
        TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
ffdd74
89c744
        # Update heading information inside the current file being
89c744
        # processed. Use the first and final heading information.
89c744
        sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
ffdd74
89c744
        # Increase heading counter.
89c744
        COUNT=$(($COUNT + 1))
8aa7eb
ffdd74
    done
ffdd74
89c744
    # Build the table of contents using heading numerical
89c744
    # identifications as reference. The numerical identification
89c744
    # describes the order of headings in one html file. This
89c744
    # information is processed by awk to make the appropriate
89c744
    # replacements. Finnally, the result is stored in the TOC
89c744
    # variable.
89c744
    TOC=$(echo '
'
89c744
        echo "

`gettext "Table of contents"`

"
89c744
        for TOCENTRY in "${TOCENTRIES[@]}";do
89c744
            echo $TOCENTRY
89c744
        done \
89c744
            | awk -f ${FUNCCONFIG}/output_forHeadingsToc.awk)
89c744
89c744
    # Update table of contents inside the current file being
89c744
    # processed.
89c744
    sed -i -r '/
[^<\/div].*<\/div>/c'"$(echo -e $TOC)" $FILE
89c744
89c744
    # Reset counters.
89c744
    COUNT=0
89c744
    PREVCOUNT=0
89c744
89c744
    # Clean up variables to receive the next file.
89c744
    unset FINAL
89c744
    unset TITLE
89c744
    unset MD5SM
89c744
    unset OPTNS
89c744
    unset LEVEL
89c744
    unset PARENT
89c744
    unset TOCENTRIES
89c744
    unset LINK
89c744
9f1608
}