Blame tcar-scripts-tuneup/modules/xhtml/xhtml_setToc.sh

Alain Reguera Delgado b29c5b
#!/bin/bash
Alain Reguera Delgado b29c5b
###################################################################### 
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
#   xhtml_setToc.sh -- This functionality transforms web page headings
Alain Reguera Delgado b29c5b
#   to make them accessible through a table of contents.  The table of
Alain Reguera Delgado b29c5b
#   contents is expanded in place, wherever the 
Alain Reguera Delgado b29c5b
#   class="toc"> piece of code be in the page.  Once the 
Alain Reguera Delgado b29c5b
#   class="toc"> piece of code has be expanded, there is no need
Alain Reguera Delgado b29c5b
#   to put anything else in the page.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
#   In order for the tuneup functionality to transform headings, you
Alain Reguera Delgado b29c5b
#   need to put headings in just one line using one of the following
Alain Reguera Delgado b29c5b
#   forms:
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
#   

Title

Alain Reguera Delgado b29c5b
#   

Title

Alain Reguera Delgado b29c5b
#   

Title

Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
#   In the example above, h1 can vary from h1 to h6. Closing tag must
Alain Reguera Delgado b29c5b
#   be present and also match the openning tag. The value of `name'
Alain Reguera Delgado b29c5b
#   and `href' options from the anchor element are set dynamically
Alain Reguera Delgado b29c5b
#   using the md5sum output of combining the page location, the head-
Alain Reguera Delgado b29c5b
#   string and the heading string.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
#   Written by:
Alain Reguera Delgado b29c5b
#   * Alain Reguera Delgado <al@centos.org.cu>, 2009-2013
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
# Copyright (C) 2009-2013 The CentOS Artwork SIG
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
# This program is free software; you can redistribute it and/or modify
Alain Reguera Delgado b29c5b
# it under the terms of the GNU General Public License as published by
Alain Reguera Delgado b29c5b
# the Free Software Foundation; either version 2 of the License, or (at
Alain Reguera Delgado b29c5b
# your option) any later version.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
# This program is distributed in the hope that it will be useful, but
Alain Reguera Delgado b29c5b
# WITHOUT ANY WARRANTY; without even the implied warranty of
Alain Reguera Delgado b29c5b
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Alain Reguera Delgado b29c5b
# General Public License for more details.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
# You should have received a copy of the GNU General Public License
Alain Reguera Delgado b29c5b
# along with this program; if not, write to the Free Software
Alain Reguera Delgado b29c5b
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Alain Reguera Delgado b29c5b
#
Alain Reguera Delgado b29c5b
######################################################################
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
function xhtml_setToc {
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Define variables as local to avoid conflicts outside.
Alain Reguera Delgado b29c5b
    local COUNT=0
Alain Reguera Delgado b29c5b
    local PREVCOUNT=0
Alain Reguera Delgado b29c5b
    local -a FINAL
Alain Reguera Delgado b29c5b
    local -a TITLE
Alain Reguera Delgado b29c5b
    local -a MD5SM
Alain Reguera Delgado b29c5b
    local -a OPTNS
Alain Reguera Delgado b29c5b
    local -a CLASS
Alain Reguera Delgado b29c5b
    local -a LEVEL
Alain Reguera Delgado b29c5b
    local -a PARENT
Alain Reguera Delgado b29c5b
    local -a TOCENTRIES
Alain Reguera Delgado b29c5b
    local -a LINK
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Define table of content configuration file, the file used to
Alain Reguera Delgado b29c5b
    # produce the table of content XHTML output code.
Alain Reguera Delgado b29c5b
    local TOC_CONFIG=${TCAR_MODULE_DIR_CONFIGS}/toc.awk
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Verify table of content configuration file.
Alain Reguera Delgado b29c5b
    tcar_checkFiles -ef ${TOC_CONFIG}
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Define html heading regular expression pattern. Use parenthisis
Alain Reguera Delgado b29c5b
    # to save html action name, action value, and heading title.
Alain Reguera Delgado b29c5b
    local PATTERN='<h([1-6])(.*)>(<a.*[^\>]>)(.*[^<])</h[1-6]>'
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Verify list of html files. Are files really html files? If they
Alain Reguera Delgado b29c5b
    # don't, continue with the next one in the list.
Alain Reguera Delgado b29c5b
    if [[ ! $(file --brief ${FILE}) =~ '^(XHTML|HTML|XML)' ]];then
Alain Reguera Delgado b29c5b
        continue
Alain Reguera Delgado b29c5b
    fi
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Define list of headings to process. When building the heading,
Alain Reguera Delgado b29c5b
    # it is required to change spaces characters from its current
Alain Reguera Delgado b29c5b
    # decimal output to something different (e.g., its \040 octal
Alain Reguera Delgado b29c5b
    # alternative). This is required because the space character is
Alain Reguera Delgado b29c5b
    # used as egrep default field separator and spaces can be present
Alain Reguera Delgado b29c5b
    # inside heading strings we don't want to separate.
Alain Reguera Delgado b29c5b
    for HEADING in $(egrep "${PATTERN}" ${FILE} \
Alain Reguera Delgado b29c5b
        | sed -r -e 's!^[[:space:]]+!!' -e "s! !\\\040!g");do
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Define previous counter value using current counter
Alain Reguera Delgado b29c5b
        # value as reference.
Alain Reguera Delgado b29c5b
        if [[ ${COUNT} -ne 0 ]];then
Alain Reguera Delgado b29c5b
            PREVCOUNT=$((${COUNT}-1))
Alain Reguera Delgado b29c5b
        fi
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Define initial heading information.
Alain Reguera Delgado b29c5b
        FIRST[${COUNT}]=$(echo "${HEADING}" | sed -r "s!\\\040! !g")
Alain Reguera Delgado b29c5b
        TITLE[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\4!")
Alain Reguera Delgado b29c5b
        MD5SM[${COUNT}]=$(echo "${TITLE[${COUNT}]}" | md5sum | sed -r 's![[:space:]]+-$!!')
Alain Reguera Delgado b29c5b
        OPTNS[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\3!")
Alain Reguera Delgado b29c5b
        CLASS[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\2!")
Alain Reguera Delgado b29c5b
        LEVEL[${COUNT}]=$(echo "${FIRST[${COUNT}]}" | sed -r "s!${PATTERN}!\1!")
Alain Reguera Delgado b29c5b
        PARENT[${COUNT}]=${LEVEL[${PREVCOUNT}]}
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Transform heading information using initial heading
Alain Reguera Delgado b29c5b
        # information as reference.
Alain Reguera Delgado b29c5b
        if [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then
Alain Reguera Delgado b29c5b
            OPTNS[${COUNT}]=''
Alain Reguera Delgado b29c5b
        elif [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then 
Alain Reguera Delgado b29c5b
            OPTNS[${COUNT}]=''
Alain Reguera Delgado b29c5b
        elif [[ ${OPTNS[${COUNT}]} =~ '^$' ]];then
Alain Reguera Delgado b29c5b
            OPTNS[${COUNT}]=''
Alain Reguera Delgado b29c5b
        fi
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Build final html heading structure.
Alain Reguera Delgado b29c5b
        FINAL[${COUNT}]='<h'${LEVEL[${COUNT}]}${CLASS[${COUNT}]}'>'${OPTNS[${COUNT}]}${TITLE[${COUNT}]}'</h'${LEVEL[${COUNT}]}'>'
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Build html heading link structure. These links are used by
Alain Reguera Delgado b29c5b
        # the table of contents later.
Alain Reguera Delgado b29c5b
        LINK[${COUNT}]=''${TITLE[${COUNT}]}''
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Build table of contents entry with numerical
Alain Reguera Delgado b29c5b
        # identifications. The numerical identification is what we use
Alain Reguera Delgado b29c5b
        # to determine the correct position of each heading link on
Alain Reguera Delgado b29c5b
        # the table of content.
Alain Reguera Delgado b29c5b
        TOCENTRIES[${COUNT}]="${COUNT}:${LEVEL[${COUNT}]}:${PARENT[${COUNT}]}:${LINK[${COUNT}]}"
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Update heading information inside the current file being
Alain Reguera Delgado b29c5b
        # processed. Use the first and final heading information.
Alain Reguera Delgado b29c5b
        sed -i -r "s!${FIRST[${COUNT}]}!${FINAL[${COUNT}]}!" ${FILE}
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
        # Increase heading counter.
Alain Reguera Delgado b29c5b
        COUNT=$((${COUNT} + 1))
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    done
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Build the table of contents using heading numerical
Alain Reguera Delgado b29c5b
    # identifications as reference. The numerical identification
Alain Reguera Delgado b29c5b
    # describes the order of headings in one xhtml file. This
Alain Reguera Delgado b29c5b
    # information is processed by awk to make the appropriate
Alain Reguera Delgado b29c5b
    # replacements. Finnally, the result is stored in the TOC
Alain Reguera Delgado b29c5b
    # variable.
Alain Reguera Delgado b29c5b
    TOC=$(echo '
'
Alain Reguera Delgado b29c5b
        echo "

`gettext "Table of contents"`

"
Alain Reguera Delgado b29c5b
        for TOCENTRY in "${TOCENTRIES[@]}";do
Alain Reguera Delgado b29c5b
            echo ${TOCENTRY}
Alain Reguera Delgado b29c5b
        done \
Alain Reguera Delgado b29c5b
            | awk -f ${TOC_CONFIG})
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Update table of contents inside the current file being
Alain Reguera Delgado b29c5b
    # processed.
Alain Reguera Delgado b29c5b
    sed -i -r '/
[^<\/div].*<\/div>/c'"$(echo -e ${TOC})" ${FILE}
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
    # Clean up variables to receive the next file.
Alain Reguera Delgado b29c5b
    unset FINAL
Alain Reguera Delgado b29c5b
    unset TITLE
Alain Reguera Delgado b29c5b
    unset MD5SM
Alain Reguera Delgado b29c5b
    unset OPTNS
Alain Reguera Delgado b29c5b
    unset CLASS
Alain Reguera Delgado b29c5b
    unset LEVEL
Alain Reguera Delgado b29c5b
    unset PARENT
Alain Reguera Delgado b29c5b
    unset TOCENTRIES
Alain Reguera Delgado b29c5b
    unset LINK
Alain Reguera Delgado b29c5b
Alain Reguera Delgado b29c5b
}