Blame Scripts/Bash/Functions/Tuneup/Xhtml/xhtml_doToc.sh

878a2b
#!/bin/bash
878a2b
# 
878a2b
# xhtml_doToc.sh -- This functionality transforms web page headings to
878a2b
# make them accessible through a table of contents.  The table of
878a2b
# contents is expanded in place, wherever the 
878a2b
# piece of code be in the page.  Once the 
878a2b
# piece of code has be expanded, there is no need to put anything else
878a2b
# in the page.
878a2b
#
878a2b
# In order for the tuneup functionality to transform headings, you
878a2b
# need to put headings in just one line using one of the following
878a2b
# forms:
878a2b
#
878a2b
# 

Title

878a2b
# 

Title

878a2b
# 

Title

878a2b
#
878a2b
# In the example above, h1 can vary from h1 to h6. Closing tag must be
878a2b
# present and also match the openning tag. The value of `name' and
878a2b
# `href' options from the anchor element are set dynamically using the
878a2b
# md5sum output of combining the page location, the head- string and
878a2b
# the heading string.
878a2b
#
03486a
# Copyright (C) 2009, 2010, 2011, 2012 The CentOS Project
878a2b
#
878a2b
# This program is free software; you can redistribute it and/or modify
878a2b
# it under the terms of the GNU General Public License as published by
878a2b
# the Free Software Foundation; either version 2 of the License, or (at
878a2b
# your option) any later version.
878a2b
#
878a2b
# This program is distributed in the hope that it will be useful, but
878a2b
# WITHOUT ANY WARRANTY; without even the implied warranty of
878a2b
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
878a2b
# General Public License for more details.
878a2b
#
878a2b
# You should have received a copy of the GNU General Public License
878a2b
# along with this program; if not, write to the Free Software
878a2b
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
878a2b
#
878a2b
# ----------------------------------------------------------------------
878a2b
# $Id$
878a2b
# ----------------------------------------------------------------------
878a2b
878a2b
function xhtml_doToc {
878a2b
878a2b
    # Define variables as local to avoid conflicts outside.
878a2b
    local COUNT=0
878a2b
    local PREVCOUNT=0
878a2b
    local -a FINAL
878a2b
    local -a TITLE
878a2b
    local -a MD5SM
878a2b
    local -a OPTNS
878a2b
    local -a CLASS
878a2b
    local -a LEVEL
878a2b
    local -a PARENT
878a2b
    local -a TOCENTRIES
878a2b
    local -a LINK
878a2b
878a2b
    # Define table of content configuration file, the file used to
878a2b
    # produce the table of content XHTML output code.
878a2b
    local TOC_CONFIG=${TUNEUP_CONFIG_DIR}/toc.awk
878a2b
878a2b
    # Verify table of content configuration file.
878a2b
    cli_checkFiles ${TOC_CONFIG}
878a2b
878a2b
    # Define html heading regular expression pattern. Use parenthisis
878a2b
    # to save html action name, action value, and heading title.
878a2b
    local PATTERN='<h([1-6])(.*)>(<a.*[^\>]>)(.*[^<])</h[1-6]>'
878a2b
878a2b
    # Verify list of html files. Are files really html files? If they
878a2b
    # don't, continue with the next one in the list.
878a2b
    if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
878a2b
        continue
878a2b
    fi
878a2b
878a2b
    # Define list of headings to process. When building the heading,
878a2b
    # it is required to change spaces characters from its current
878a2b
    # decimal output to something different (e.g., its \040 octal
878a2b
    # alternative). This is required because the space character is
878a2b
    # used as egrep default field separator and spaces can be present
878a2b
    # inside heading strings we don't want to separate.
878a2b
    for HEADING in $(egrep "$PATTERN" $FILE \
878a2b
        | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
878a2b
878a2b
        # Define previous counter value using current counter
878a2b
        # value as reference.
878a2b
        if [[ $COUNT -ne 0 ]];then
878a2b
            PREVCOUNT=$(($COUNT-1))
878a2b
        fi
878a2b
878a2b
        # Define initial heading information.
878a2b
        FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g")
878a2b
        TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\4!")
878a2b
        MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
878a2b
        OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
878a2b
        CLASS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
878a2b
        LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
878a2b
        PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
878a2b
878a2b
        # Transform heading information using initial heading
878a2b
        # information as reference.
878a2b
        if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
878a2b
            OPTNS[$COUNT]=''
878a2b
        elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then 
878a2b
            OPTNS[$COUNT]=''
878a2b
        elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
878a2b
            OPTNS[$COUNT]=''
878a2b
        fi
878a2b
878a2b
        # Build final html heading structure.
878a2b
        FINAL[$COUNT]='<h'${LEVEL[$COUNT]}${CLASS[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
878a2b
878a2b
        # Build html heading link structure. These links are used by
878a2b
        # the table of contents later.
878a2b
        LINK[$COUNT]=''${TITLE[$COUNT]}''
878a2b
878a2b
        # Build table of contents entry with numerical
878a2b
        # identifications. The numerical identification is what we use
878a2b
        # to determine the correct position of each heading link on
878a2b
        # the table of content.
878a2b
        TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
878a2b
878a2b
        # Update heading information inside the current file being
878a2b
        # processed. Use the first and final heading information.
878a2b
        sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
878a2b
878a2b
        # Increase heading counter.
878a2b
        COUNT=$(($COUNT + 1))
878a2b
878a2b
    done
878a2b
878a2b
    # Build the table of contents using heading numerical
878a2b
    # identifications as reference. The numerical identification
878a2b
    # describes the order of headings in one xhtml file. This
878a2b
    # information is processed by awk to make the appropriate
878a2b
    # replacements. Finnally, the result is stored in the TOC
878a2b
    # variable.
878a2b
    TOC=$(echo '
'
878a2b
        echo "

`gettext "Table of contents"`

"
878a2b
        for TOCENTRY in "${TOCENTRIES[@]}";do
878a2b
            echo $TOCENTRY
878a2b
        done \
878a2b
            | awk -f ${TOC_CONFIG})
878a2b
878a2b
    # Update table of contents inside the current file being
878a2b
    # processed.
878a2b
    sed -i -r '/
[^<\/div].*<\/div>/c'"$(echo -e $TOC)" $FILE
878a2b
878a2b
    # Clean up variables to receive the next file.
878a2b
    unset FINAL
878a2b
    unset TITLE
878a2b
    unset MD5SM
878a2b
    unset OPTNS
878a2b
    unset CLASS
878a2b
    unset LEVEL
878a2b
    unset PARENT
878a2b
    unset TOCENTRIES
878a2b
    unset LINK
878a2b
878a2b
}