Blame Automation/Modules/Tuneup/Xhtml/xhtml_doToc.sh

Alain Reguera Delgado 8f60cb
#!/bin/bash
Alain Reguera Delgado 8f60cb
# 
Alain Reguera Delgado 8f60cb
# xhtml_doToc.sh -- This functionality transforms web page headings to
Alain Reguera Delgado 8f60cb
# make them accessible through a table of contents.  The table of
Alain Reguera Delgado 8f60cb
# contents is expanded in place, wherever the 
Alain Reguera Delgado 8f60cb
# piece of code be in the page.  Once the 
Alain Reguera Delgado 8f60cb
# piece of code has be expanded, there is no need to put anything else
Alain Reguera Delgado 8f60cb
# in the page.
Alain Reguera Delgado 8f60cb
#
Alain Reguera Delgado 8f60cb
# In order for the tuneup functionality to transform headings, you
Alain Reguera Delgado 8f60cb
# need to put headings in just one line using one of the following
Alain Reguera Delgado 8f60cb
# forms:
Alain Reguera Delgado 8f60cb
#
Alain Reguera Delgado 8f60cb
# 

Title

Alain Reguera Delgado 8f60cb
# 

Title

Alain Reguera Delgado 8f60cb
# 

Title

Alain Reguera Delgado 8f60cb
#
Alain Reguera Delgado 8f60cb
# In the example above, h1 can vary from h1 to h6. Closing tag must be
Alain Reguera Delgado 8f60cb
# present and also match the openning tag. The value of `name' and
Alain Reguera Delgado 8f60cb
# `href' options from the anchor element are set dynamically using the
Alain Reguera Delgado 8f60cb
# md5sum output of combining the page location, the head- string and
Alain Reguera Delgado 8f60cb
# the heading string.
Alain Reguera Delgado 8f60cb
#
Alain Reguera Delgado 8f60cb
# Copyright (C) 2009-2013 The CentOS Project
Alain Reguera Delgado 8f60cb
#
Alain Reguera Delgado 8f60cb
# This program is free software; you can redistribute it and/or modify
Alain Reguera Delgado 8f60cb
# it under the terms of the GNU General Public License as published by
Alain Reguera Delgado 8f60cb
# the Free Software Foundation; either version 2 of the License, or (at
Alain Reguera Delgado 8f60cb
# your option) any later version.
Alain Reguera Delgado 8f60cb
#
Alain Reguera Delgado 8f60cb
# This program is distributed in the hope that it will be useful, but
Alain Reguera Delgado 8f60cb
# WITHOUT ANY WARRANTY; without even the implied warranty of
Alain Reguera Delgado 8f60cb
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Alain Reguera Delgado 8f60cb
# General Public License for more details.
Alain Reguera Delgado 8f60cb
#
Alain Reguera Delgado 8f60cb
# You should have received a copy of the GNU General Public License
Alain Reguera Delgado 8f60cb
# along with this program; if not, write to the Free Software
Alain Reguera Delgado 8f60cb
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Alain Reguera Delgado 8f60cb
#
Alain Reguera Delgado 8f60cb
# ----------------------------------------------------------------------
Alain Reguera Delgado 8f60cb
# $Id$
Alain Reguera Delgado 8f60cb
# ----------------------------------------------------------------------
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
function xhtml_doToc {
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Define variables as local to avoid conflicts outside.
Alain Reguera Delgado 8f60cb
    local COUNT=0
Alain Reguera Delgado 8f60cb
    local PREVCOUNT=0
Alain Reguera Delgado 8f60cb
    local -a FINAL
Alain Reguera Delgado 8f60cb
    local -a TITLE
Alain Reguera Delgado 8f60cb
    local -a MD5SM
Alain Reguera Delgado 8f60cb
    local -a OPTNS
Alain Reguera Delgado 8f60cb
    local -a CLASS
Alain Reguera Delgado 8f60cb
    local -a LEVEL
Alain Reguera Delgado 8f60cb
    local -a PARENT
Alain Reguera Delgado 8f60cb
    local -a TOCENTRIES
Alain Reguera Delgado 8f60cb
    local -a LINK
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Define table of content configuration file, the file used to
Alain Reguera Delgado 8f60cb
    # produce the table of content XHTML output code.
Alain Reguera Delgado 8f60cb
    local TOC_CONFIG=${TUNEUP_CONFIG_DIR}/toc.awk
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Verify table of content configuration file.
Alain Reguera Delgado 8f60cb
    cli_checkFiles -e ${TOC_CONFIG}
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Define html heading regular expression pattern. Use parenthisis
Alain Reguera Delgado 8f60cb
    # to save html action name, action value, and heading title.
Alain Reguera Delgado 8f60cb
    local PATTERN='<h([1-6])(.*)>(<a.*[^\>]>)(.*[^<])</h[1-6]>'
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Verify list of html files. Are files really html files? If they
Alain Reguera Delgado 8f60cb
    # don't, continue with the next one in the list.
Alain Reguera Delgado 8f60cb
    if [[ ! $(file --brief $FILE) =~ '^(XHTML|HTML|XML)' ]];then
Alain Reguera Delgado 8f60cb
        continue
Alain Reguera Delgado 8f60cb
    fi
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Define list of headings to process. When building the heading,
Alain Reguera Delgado 8f60cb
    # it is required to change spaces characters from its current
Alain Reguera Delgado 8f60cb
    # decimal output to something different (e.g., its \040 octal
Alain Reguera Delgado 8f60cb
    # alternative). This is required because the space character is
Alain Reguera Delgado 8f60cb
    # used as egrep default field separator and spaces can be present
Alain Reguera Delgado 8f60cb
    # inside heading strings we don't want to separate.
Alain Reguera Delgado 8f60cb
    for HEADING in $(egrep "$PATTERN" $FILE \
Alain Reguera Delgado 8f60cb
        | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
        # Define previous counter value using current counter
Alain Reguera Delgado 8f60cb
        # value as reference.
Alain Reguera Delgado 8f60cb
        if [[ $COUNT -ne 0 ]];then
Alain Reguera Delgado 8f60cb
            PREVCOUNT=$(($COUNT-1))
Alain Reguera Delgado 8f60cb
        fi
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
        # Define initial heading information.
Alain Reguera Delgado 8f60cb
        FIRST[$COUNT]=$(echo $HEADING | sed -r "s!\\\040! !g")
Alain Reguera Delgado 8f60cb
        TITLE[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\4!")
Alain Reguera Delgado 8f60cb
        MD5SM[$COUNT]=$(echo "${FILE}${FIRST[$COUNT]}" | md5sum | sed -r 's![[:space:]]+-$!!')
Alain Reguera Delgado 8f60cb
        OPTNS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\3!")
Alain Reguera Delgado 8f60cb
        CLASS[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\2!")
Alain Reguera Delgado 8f60cb
        LEVEL[$COUNT]=$(echo ${FIRST[$COUNT]} | sed -r "s!$PATTERN!\1!")
Alain Reguera Delgado 8f60cb
        PARENT[$COUNT]=${LEVEL[$PREVCOUNT]}
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
        # Transform heading information using initial heading
Alain Reguera Delgado 8f60cb
        # information as reference.
Alain Reguera Delgado 8f60cb
        if [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
Alain Reguera Delgado 8f60cb
            OPTNS[$COUNT]=''
Alain Reguera Delgado 8f60cb
        elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then 
Alain Reguera Delgado 8f60cb
            OPTNS[$COUNT]=''
Alain Reguera Delgado 8f60cb
        elif [[ ${OPTNS[$COUNT]} =~ '^$' ]];then
Alain Reguera Delgado 8f60cb
            OPTNS[$COUNT]=''
Alain Reguera Delgado 8f60cb
        fi
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
        # Build final html heading structure.
Alain Reguera Delgado 8f60cb
        FINAL[$COUNT]='<h'${LEVEL[$COUNT]}${CLASS[$COUNT]}'>'${OPTNS[$COUNT]}${TITLE[$COUNT]}'</h'${LEVEL[$COUNT]}'>'
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
        # Build html heading link structure. These links are used by
Alain Reguera Delgado 8f60cb
        # the table of contents later.
Alain Reguera Delgado 8f60cb
        LINK[$COUNT]=''${TITLE[$COUNT]}''
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
        # Build table of contents entry with numerical
Alain Reguera Delgado 8f60cb
        # identifications. The numerical identification is what we use
Alain Reguera Delgado 8f60cb
        # to determine the correct position of each heading link on
Alain Reguera Delgado 8f60cb
        # the table of content.
Alain Reguera Delgado 8f60cb
        TOCENTRIES[$COUNT]="$COUNT:${LEVEL[$COUNT]}:${PARENT[$COUNT]}:${LINK[$COUNT]}"
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
        # Update heading information inside the current file being
Alain Reguera Delgado 8f60cb
        # processed. Use the first and final heading information.
Alain Reguera Delgado 8f60cb
        sed -i -r "s!${FIRST[$COUNT]}!${FINAL[$COUNT]}!" $FILE
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
        # Increase heading counter.
Alain Reguera Delgado 8f60cb
        COUNT=$(($COUNT + 1))
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    done
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Build the table of contents using heading numerical
Alain Reguera Delgado 8f60cb
    # identifications as reference. The numerical identification
Alain Reguera Delgado 8f60cb
    # describes the order of headings in one xhtml file. This
Alain Reguera Delgado 8f60cb
    # information is processed by awk to make the appropriate
Alain Reguera Delgado 8f60cb
    # replacements. Finnally, the result is stored in the TOC
Alain Reguera Delgado 8f60cb
    # variable.
Alain Reguera Delgado 8f60cb
    TOC=$(echo '
'
Alain Reguera Delgado 8f60cb
        echo "

`gettext "Table of contents"`

"
Alain Reguera Delgado 8f60cb
        for TOCENTRY in "${TOCENTRIES[@]}";do
Alain Reguera Delgado 8f60cb
            echo $TOCENTRY
Alain Reguera Delgado 8f60cb
        done \
Alain Reguera Delgado 8f60cb
            | awk -f ${TOC_CONFIG})
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Update table of contents inside the current file being
Alain Reguera Delgado 8f60cb
    # processed.
Alain Reguera Delgado 8f60cb
    sed -i -r '/
[^<\/div].*<\/div>/c'"$(echo -e $TOC)" $FILE
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
    # Clean up variables to receive the next file.
Alain Reguera Delgado 8f60cb
    unset FINAL
Alain Reguera Delgado 8f60cb
    unset TITLE
Alain Reguera Delgado 8f60cb
    unset MD5SM
Alain Reguera Delgado 8f60cb
    unset OPTNS
Alain Reguera Delgado 8f60cb
    unset CLASS
Alain Reguera Delgado 8f60cb
    unset LEVEL
Alain Reguera Delgado 8f60cb
    unset PARENT
Alain Reguera Delgado 8f60cb
    unset TOCENTRIES
Alain Reguera Delgado 8f60cb
    unset LINK
Alain Reguera Delgado 8f60cb
Alain Reguera Delgado 8f60cb
}